NCBI C++ ToolKit
blast_args.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

00001 /* $Id: blast_args.cpp 65629 2014-12-16 15:45:20Z morgulis $
00002 * ===========================================================================
00003 *
00004 *                            PUBLIC DOMAIN NOTICE
00005 *               National Center for Biotechnology Information
00006 *
00007 *  This software/database is a "United States Government Work" under the
00008 *  terms of the United States Copyright Act.  It was written as part of
00009 *  the author's offical duties as a United States Government employee and
00010 *  thus cannot be copyrighted.  This software/database is freely available
00011 *  to the public for use. The National Library of Medicine and the U.S.
00012 *  Government have not placed any restriction on its use or reproduction.
00013 *
00014 *  Although all reasonable efforts have been taken to ensure the accuracy
00015 *  and reliability of the software and data, the NLM and the U.S.
00016 *  Government do not and cannot warrant the performance or results that
00017 *  may be obtained by using this software or data. The NLM and the U.S.
00018 *  Government disclaim all warranties, express or implied, including
00019 *  warranties of performance, merchantability or fitness for any particular
00020 *  purpose.
00021 *
00022 *  Please cite the author in any work or product based on this material.
00023 *
00024 * ===========================================================================*/
00025 
00026 /*****************************************************************************
00027 
00028 File name: blast_args.cpp
00029 
00030 Author: Jason Papadopoulos
00031 
00032 ******************************************************************************/
00033 
00034 /** @file blast_args.cpp
00035  * convert blast-related command line
00036  * arguments into blast options
00037 */
00038 
00039 #ifndef SKIP_DOXYGEN_PROCESSING
00040 static char const rcsid[] = "$Id: blast_args.cpp 65629 2014-12-16 15:45:20Z morgulis $";
00041 #endif
00042 
00043 #include <ncbi_pch.hpp>
00044 #include <algo/blast/api/version.hpp>
00045 #include <algo/blast/blastinput/blast_args.hpp>
00046 #include <algo/blast/api/blast_exception.hpp>
00047 #include <algo/blast/api/blast_aux.hpp>
00048 #include <algo/blast/api/objmgr_query_data.hpp> /* for CObjMgrQueryFactory */
00049 #include <algo/blast/core/blast_nalookup.h>
00050 #include <algo/blast/core/hspfilter_besthit.h>
00051 #include <objects/scoremat/PssmWithParameters.hpp>
00052 #include <util/format_guess.hpp>
00053 #include <objtools/blast/seqdb_reader/seqdb.hpp>
00054 #include <algo/blast/blastinput/blast_input.hpp>    // for CInputException
00055 #include <connect/ncbi_connutil.h>
00056 
00057 #include <algo/blast/api/msa_pssm_input.hpp>    // for CPsiBlastInputClustalW
00058 #include <algo/blast/api/pssm_engine.hpp>       // for CPssmEngine
00059 
00060 BEGIN_NCBI_SCOPE
00061 BEGIN_SCOPE(blast)
00062 USING_SCOPE(objects);
00063 USING_SCOPE(align_format);
00064 
00065 void
00066 IBlastCmdLineArgs::ExtractAlgorithmOptions(const CArgs& /* cmd_line_args */,
00067                                            CBlastOptions& /* options */)
00068 {}
00069 
00070 CProgramDescriptionArgs::CProgramDescriptionArgs(const string& program_name, 
00071                                                  const string& program_desc)
00072     : m_ProgName(program_name), m_ProgDesc(program_desc)
00073 {}
00074 
00075 void
00076 CProgramDescriptionArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
00077 {
00078     // program description
00079     arg_desc.SetUsageContext(m_ProgName, m_ProgDesc + " " + 
00080                              CBlastVersion().Print());
00081 }
00082 
00083 CTaskCmdLineArgs::CTaskCmdLineArgs(const set<string>& supported_tasks,
00084                                    const string& default_task)
00085 : m_SupportedTasks(supported_tasks), m_DefaultTask(default_task)
00086 {
00087     _ASSERT( !m_SupportedTasks.empty() );
00088     if ( !m_DefaultTask.empty() ) {
00089         _ASSERT(m_SupportedTasks.find(m_DefaultTask) != m_SupportedTasks.end());
00090     }
00091 }
00092 
00093 void
00094 CTaskCmdLineArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
00095 {
00096     arg_desc.SetCurrentGroup("General search options");
00097     if ( !m_DefaultTask.empty() ) {
00098         arg_desc.AddDefaultKey(kTask, "task_name", "Task to execute", 
00099                                CArgDescriptions::eString, m_DefaultTask);
00100     } else {
00101         arg_desc.AddKey(kTask, "task_name", "Task to execute",
00102                         CArgDescriptions::eString);
00103     }
00104     arg_desc.SetConstraint(kTask, new CArgAllowStringSet(m_SupportedTasks));
00105     arg_desc.SetCurrentGroup("");
00106    
00107 }
00108 
00109 void
00110 CTaskCmdLineArgs::ExtractAlgorithmOptions(const CArgs& /* cmd_line_args */,
00111                                           CBlastOptions& /* options */)
00112 {
00113     // N.B.: handling of tasks occurs at the application level to ensure that
00114     // only relevant tasks are added (@sa CBlastnAppArgs)
00115 }
00116 
00117 void
00118 CGenericSearchArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
00119 {
00120     arg_desc.SetCurrentGroup("General search options");
00121 
00122     // evalue cutoff
00123     if (!m_IsIgBlast) {
00124         arg_desc.AddDefaultKey(kArgEvalue, "evalue", 
00125                      "Expectation value (E) threshold for saving hits ",
00126                      CArgDescriptions::eDouble,
00127                      NStr::DoubleToString(BLAST_EXPECT_VALUE));
00128     } else if (m_QueryIsProtein) {
00129         arg_desc.AddDefaultKey(kArgEvalue, "evalue", 
00130                      "Expectation value (E) threshold for saving hits ",
00131                      CArgDescriptions::eDouble,
00132                      NStr::DoubleToString(1.0));
00133     } else {
00134         arg_desc.AddDefaultKey(kArgEvalue, "evalue", 
00135                      "Expectation value (E) threshold for saving hits ",
00136                      CArgDescriptions::eDouble,
00137                      NStr::DoubleToString(1e-15));
00138     }
00139 
00140     // word size
00141     // Default values: blastn=11, megablast=28, others=3
00142     if(!m_IsRpsBlast) {
00143         const string description = m_QueryIsProtein
00144             ? "Word size for wordfinder algorithm"
00145             : "Word size for wordfinder algorithm (length of best perfect match)";
00146         arg_desc.AddOptionalKey(kArgWordSize, "int_value", description,
00147                                 CArgDescriptions::eInteger);
00148         arg_desc.SetConstraint(kArgWordSize, m_QueryIsProtein
00149                                ? new CArgAllowValuesGreaterThanOrEqual(2)
00150                                : new CArgAllowValuesGreaterThanOrEqual(4));
00151     }
00152 
00153     if ( !m_IsRpsBlast && !m_IsTblastx) {
00154         // gap open penalty
00155         arg_desc.AddOptionalKey(kArgGapOpen, "open_penalty", 
00156                                 "Cost to open a gap", 
00157                                 CArgDescriptions::eInteger);
00158 
00159         // gap extend penalty
00160         arg_desc.AddOptionalKey(kArgGapExtend, "extend_penalty",
00161                                "Cost to extend a gap", 
00162                                CArgDescriptions::eInteger);
00163     }
00164 
00165 
00166     if (m_ShowPercentIdentity) {
00167         arg_desc.SetCurrentGroup("Restrict search or results");
00168         arg_desc.AddOptionalKey(kArgPercentIdentity, "float_value",
00169                                 "Percent identity",
00170                                 CArgDescriptions::eDouble);
00171         arg_desc.SetConstraint(kArgPercentIdentity,
00172                                new CArgAllow_Doubles(0.0, 100.0));
00173     }
00174 
00175     arg_desc.SetCurrentGroup("Restrict search or results");
00176     arg_desc.AddOptionalKey(kArgQueryCovHspPerc, "float_value",
00177                             "Percent query coverage per hsp",
00178                             CArgDescriptions::eDouble);
00179     arg_desc.SetConstraint(kArgQueryCovHspPerc,
00180                            new CArgAllow_Doubles(0.0, 100.0));
00181 
00182     arg_desc.SetCurrentGroup("Extension options");
00183     // ungapped X-drop
00184     // Default values: blastn=20, megablast=10, others=7
00185     arg_desc.AddOptionalKey(kArgUngappedXDropoff, "float_value", 
00186                             "X-dropoff value (in bits) for ungapped extensions",
00187                             CArgDescriptions::eDouble);
00188 
00189     // Tblastx is ungapped only.
00190     if (!m_IsTblastx) {
00191          // initial gapped X-drop
00192          // Default values: blastn=30, megablast=20, tblastx=0, others=15
00193          arg_desc.AddOptionalKey(kArgGappedXDropoff, "float_value", 
00194                  "X-dropoff value (in bits) for preliminary gapped extensions",
00195                  CArgDescriptions::eDouble);
00196 
00197          // final gapped X-drop
00198          // Default values: blastn/megablast=50, tblastx=0, others=25
00199          arg_desc.AddOptionalKey(kArgFinalGappedXDropoff, "float_value", 
00200                          "X-dropoff value (in bits) for final gapped alignment",
00201                          CArgDescriptions::eDouble);
00202     }
00203 
00204     arg_desc.SetCurrentGroup("Statistical options");
00205     // effective search space
00206     // Default value is the real size
00207     arg_desc.AddOptionalKey(kArgEffSearchSpace, "int_value", 
00208                             "Effective length of the search space",
00209                             CArgDescriptions::eInt8);
00210     arg_desc.SetConstraint(kArgEffSearchSpace, 
00211                            new CArgAllowValuesGreaterThanOrEqual(0));
00212 
00213     arg_desc.AddDefaultKey(kArgMaxHSPsPerSubject, "int_value",
00214                            "Set maximum number of HSPs per subject sequence to save (0 means no limit)",
00215                            CArgDescriptions::eInteger,
00216                            NStr::IntToString(kDfltArgMaxHSPsPerSubject));
00217     arg_desc.SetConstraint(kArgMaxHSPsPerSubject,
00218                            new CArgAllowValuesGreaterThanOrEqual(0));
00219 
00220     arg_desc.AddOptionalKey(kArgSumStats, "bool_value",
00221                             "Use sum statistics",
00222                             CArgDescriptions::eBoolean);
00223 
00224     arg_desc.SetCurrentGroup("");
00225 }
00226 
00227 void
00228 CGenericSearchArgs::ExtractAlgorithmOptions(const CArgs& args, 
00229                                             CBlastOptions& opt)
00230 {
00231     if (args[kArgEvalue]) {
00232         opt.SetEvalueThreshold(args[kArgEvalue].AsDouble());
00233     }
00234 
00235     int gap_open=0, gap_extend=0;
00236     if (args.Exist(kArgMatrixName) && args[kArgMatrixName])
00237          BLAST_GetProteinGapExistenceExtendParams
00238              (args[kArgMatrixName].AsString().c_str(), &gap_open, &gap_extend);
00239 
00240     if (args.Exist(kArgGapOpen) && args[kArgGapOpen]) {
00241         opt.SetGapOpeningCost(args[kArgGapOpen].AsInteger());
00242     }
00243     else if (args.Exist(kArgMatrixName) && args[kArgMatrixName]) {
00244         opt.SetGapOpeningCost(gap_open);
00245     }
00246 
00247     if (args.Exist(kArgGapExtend) && args[kArgGapExtend]) {
00248         opt.SetGapExtensionCost(args[kArgGapExtend].AsInteger());
00249     }
00250     else if (args.Exist(kArgMatrixName) && args[kArgMatrixName]) {
00251         opt.SetGapExtensionCost(gap_extend);
00252     }
00253 
00254     if (args[kArgUngappedXDropoff]) {
00255         opt.SetXDropoff(args[kArgUngappedXDropoff].AsDouble());
00256     }
00257 
00258     if (args.Exist(kArgGappedXDropoff) && args[kArgGappedXDropoff]) {
00259         opt.SetGapXDropoff(args[kArgGappedXDropoff].AsDouble());
00260     }
00261 
00262     if (args.Exist(kArgFinalGappedXDropoff) && args[kArgFinalGappedXDropoff]) {
00263         opt.SetGapXDropoffFinal(args[kArgFinalGappedXDropoff].AsDouble());
00264     }
00265 
00266     if ( args.Exist(kArgWordSize) && args[kArgWordSize]) {
00267         if (m_QueryIsProtein && args[kArgWordSize].AsInteger() > 5)
00268            opt.SetLookupTableType(eCompressedAaLookupTable);
00269         opt.SetWordSize(args[kArgWordSize].AsInteger());
00270     }
00271 
00272     if (args[kArgEffSearchSpace]) {
00273         CNcbiEnvironment env;
00274         env.Set("OLD_FSC", "true");
00275         opt.SetEffectiveSearchSpace(args[kArgEffSearchSpace].AsInt8());
00276     }
00277 
00278     if (args.Exist(kArgPercentIdentity) && args[kArgPercentIdentity]) {
00279         opt.SetPercentIdentity(args[kArgPercentIdentity].AsDouble());
00280     }
00281 
00282     if (args.Exist(kArgQueryCovHspPerc) && args[kArgQueryCovHspPerc]) {
00283         opt.SetQueryCovHspPerc(args[kArgQueryCovHspPerc].AsDouble());
00284     }
00285 
00286     if (args[kArgMaxHSPsPerSubject]) {
00287         const int value = args[kArgMaxHSPsPerSubject].AsInteger();
00288         if (value != kDfltArgMaxHSPsPerSubject) {
00289             opt.SetMaxNumHspPerSequence(value);
00290         }
00291     }
00292 
00293     if (args.Exist(kArgSumStats) && args[kArgSumStats]) {
00294         opt.SetSumStatisticsMode(args[kArgSumStats].AsBoolean());
00295     }
00296 }
00297 
00298 void
00299 CFilteringArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
00300 {
00301     arg_desc.SetCurrentGroup("Query filtering options");
00302 
00303     if (m_QueryIsProtein) {
00304         arg_desc.AddDefaultKey(kArgSegFiltering, "SEG_options",
00305                         "Filter query sequence with SEG "
00306                         "(Format: '" + kDfltArgApplyFiltering + "', " +
00307                         "'window locut hicut', or '" + kDfltArgNoFiltering +
00308                         "' to disable)",
00309                         CArgDescriptions::eString, m_FilterByDefault
00310                         ? kDfltArgSegFiltering : kDfltArgNoFiltering);
00311         arg_desc.AddDefaultKey(kArgLookupTableMaskingOnly, "soft_masking",
00312                         "Apply filtering locations as soft masks",
00313                         CArgDescriptions::eBoolean,
00314                         kDfltArgLookupTableMaskingOnlyProt);
00315     } else {
00316         arg_desc.AddDefaultKey(kArgDustFiltering, "DUST_options",
00317                         "Filter query sequence with DUST "
00318                         "(Format: '" + kDfltArgApplyFiltering + "', " + 
00319                         "'level window linker', or '" + kDfltArgNoFiltering +
00320                         "' to disable)",
00321                         CArgDescriptions::eString, m_FilterByDefault
00322                         ? kDfltArgDustFiltering : kDfltArgNoFiltering);
00323         arg_desc.AddOptionalKey(kArgFilteringDb, "filtering_database",
00324                 "BLAST database containing filtering elements (i.e.: repeats)",
00325                 CArgDescriptions::eString);
00326         
00327         arg_desc.AddOptionalKey(kArgWindowMaskerTaxId, "window_masker_taxid",
00328                 "Enable WindowMasker filtering using a Taxonomic ID",
00329                 CArgDescriptions::eInteger);
00330 
00331         arg_desc.AddOptionalKey(kArgWindowMaskerDatabase, "window_masker_db",
00332                 "Enable WindowMasker filtering using this repeats database.",
00333                 CArgDescriptions::eString);
00334 
00335         arg_desc.AddDefaultKey(kArgLookupTableMaskingOnly, "soft_masking",
00336                         "Apply filtering locations as soft masks",
00337                         CArgDescriptions::eBoolean,
00338                         kDfltArgLookupTableMaskingOnlyNucl);
00339     }
00340 
00341     arg_desc.SetCurrentGroup("");
00342 }
00343 
00344 void 
00345 CFilteringArgs::x_TokenizeFilteringArgs(const string& filtering_args, 
00346                                         vector<string>& output) const
00347 {
00348     output.clear();
00349     NStr::Tokenize(filtering_args, " ", output);
00350     if (output.size() != 3) {
00351         NCBI_THROW(CInputException, eInvalidInput,
00352                    "Invalid number of arguments to filtering option");
00353     }
00354 }
00355 
00356 void
00357 CFilteringArgs::ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& opt)
00358 {
00359     if (args[kArgLookupTableMaskingOnly]) {
00360         opt.SetMaskAtHash(args[kArgLookupTableMaskingOnly].AsBoolean());
00361     }
00362 
00363     vector<string> tokens;
00364 
00365     try {
00366         if (m_QueryIsProtein && args[kArgSegFiltering]) {
00367             const string& seg_opts = args[kArgSegFiltering].AsString();
00368             if (seg_opts == kDfltArgNoFiltering) {
00369                 opt.SetSegFiltering(false);
00370             } else if (seg_opts == kDfltArgApplyFiltering) {
00371                 opt.SetSegFiltering(true);
00372             } else {
00373                 x_TokenizeFilteringArgs(seg_opts, tokens);
00374                 opt.SetSegFilteringWindow(NStr::StringToInt(tokens[0]));
00375                 opt.SetSegFilteringLocut(NStr::StringToDouble(tokens[1]));
00376                 opt.SetSegFilteringHicut(NStr::StringToDouble(tokens[2]));
00377             }
00378         }
00379 
00380         if ( !m_QueryIsProtein && args[kArgDustFiltering]) {
00381             const string& dust_opts = args[kArgDustFiltering].AsString();
00382             if (dust_opts == kDfltArgNoFiltering) {
00383                 opt.SetDustFiltering(false);
00384             } else if (dust_opts == kDfltArgApplyFiltering) {
00385                 opt.SetDustFiltering(true);
00386             } else {
00387                 x_TokenizeFilteringArgs(dust_opts, tokens);
00388                 opt.SetDustFilteringLevel(NStr::StringToInt(tokens[0]));
00389                 opt.SetDustFilteringWindow(NStr::StringToInt(tokens[1]));
00390                 opt.SetDustFilteringLinker(NStr::StringToInt(tokens[2]));
00391             }
00392         }
00393     } catch (const CStringException& e) {
00394         if (e.GetErrCode() == CStringException::eConvert) {
00395             NCBI_THROW(CInputException, eInvalidInput,
00396                        "Invalid input for filtering parameters");
00397         }
00398     }
00399     
00400     int filter_dbs = 0;
00401     
00402     if (args.Exist(kArgFilteringDb) && args[kArgFilteringDb]) {
00403         opt.SetRepeatFilteringDB(args[kArgFilteringDb].AsString().c_str());
00404         filter_dbs++;
00405     }
00406     
00407     if (args.Exist(kArgWindowMaskerTaxId) &&
00408         args[kArgWindowMaskerTaxId]) {
00409         
00410         opt.SetWindowMaskerTaxId
00411             (args[kArgWindowMaskerTaxId].AsInteger());
00412         
00413         filter_dbs++;
00414     }
00415     
00416     if (args.Exist(kArgWindowMaskerDatabase) &&
00417         args[kArgWindowMaskerDatabase]) {
00418         
00419         opt.SetWindowMaskerDatabase
00420             (args[kArgWindowMaskerDatabase].AsString().c_str());
00421         
00422         filter_dbs++;
00423     }
00424     
00425     if (filter_dbs > 1) {
00426         string msg =
00427             string("Please specify at most one of ") + kArgFilteringDb + ", " +
00428             kArgWindowMaskerTaxId + ", or " + kArgWindowMaskerDatabase + ".";
00429         
00430         NCBI_THROW(CInputException, eInvalidInput, msg);
00431     }
00432 }
00433 
00434 void
00435 CWindowSizeArg::SetArgumentDescriptions(CArgDescriptions& arg_desc)
00436 {
00437     arg_desc.SetCurrentGroup("Extension options");
00438     // 2-hit wordfinder window size
00439     arg_desc.AddOptionalKey(kArgWindowSize, "int_value", 
00440                             "Multiple hits window size, use 0 to specify "
00441                             "1-hit algorithm",
00442                             CArgDescriptions::eInteger);
00443     arg_desc.SetConstraint(kArgWindowSize, 
00444                            new CArgAllowValuesGreaterThanOrEqual(0));
00445     arg_desc.SetCurrentGroup("");
00446 }
00447 
00448 void
00449 CWindowSizeArg::ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& opt)
00450 {
00451     if (args[kArgWindowSize]) {
00452         opt.SetWindowSize(args[kArgWindowSize].AsInteger());
00453     } else {
00454         int window = -1;
00455         BLAST_GetSuggestedWindowSize(opt.GetProgramType(), 
00456                                      opt.GetMatrixName(), 
00457                                      &window);
00458         if (window != -1) {
00459             opt.SetWindowSize(window);
00460         }
00461     }
00462 }
00463 
00464 void
00465 COffDiagonalRangeArg::SetArgumentDescriptions(CArgDescriptions& arg_desc)
00466 {
00467     arg_desc.SetCurrentGroup("Extension options");
00468     // 2-hit wordfinder off diagonal range
00469     arg_desc.AddDefaultKey(kArgOffDiagonalRange, "int_value", 
00470                             "Number of off-diagonals to search for the 2nd hit, "
00471                             "use 0 to turn off",
00472                             CArgDescriptions::eInteger,
00473                             NStr::IntToString(kDfltOffDiagonalRange));
00474     arg_desc.SetConstraint(kArgOffDiagonalRange, 
00475                            new CArgAllowValuesGreaterThanOrEqual(0));
00476     arg_desc.SetCurrentGroup("");
00477 }
00478 
00479 void
00480 COffDiagonalRangeArg::ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& opt)
00481 {
00482     if (args[kArgOffDiagonalRange]) {
00483         opt.SetOffDiagonalRange(args[kArgOffDiagonalRange].AsInteger());
00484     } else {
00485         opt.SetOffDiagonalRange(0);
00486     }
00487 }
00488 
00489 // Options specific to rmblastn -RMH-
00490 void
00491 CRMBlastNArg::SetArgumentDescriptions(CArgDescriptions& arg_desc)
00492 {
00493     arg_desc.SetCurrentGroup("General search options");
00494 
00495     arg_desc.AddDefaultKey(kArgMatrixName, "matrix_name",
00496                            "Scoring matrix name",
00497                            CArgDescriptions::eString,
00498                            string(""));
00499 
00500     arg_desc.AddFlag(kArgComplexityAdj,
00501                      "Use complexity adjusted scoring",
00502                      true);
00503 
00504 
00505     arg_desc.AddDefaultKey(kArgMaskLevel, "int_value",
00506                             "Masklevel - percentage overlap allowed per "
00507                             "query domain [0-101]",
00508                             CArgDescriptions::eInteger,
00509                             kDfltArgMaskLevel);
00510     arg_desc.SetConstraint(kArgMaskLevel,
00511                            new CArgAllowValuesLessThanOrEqual(101));
00512 
00513     arg_desc.SetCurrentGroup("");
00514 }
00515 
00516 // Options specific to rmblastn -RMH-
00517 void
00518 CRMBlastNArg::ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& opt)
00519 {
00520     if (args[kArgMatrixName]) {
00521         opt.SetMatrixName(args[kArgMatrixName].AsString().c_str());
00522     }
00523 
00524     opt.SetComplexityAdjMode( args[kArgComplexityAdj] );
00525 
00526     if (args[kArgMaskLevel]) {
00527         opt.SetMaskLevel(args[kArgMaskLevel].AsInteger());
00528     }
00529 
00530     if (args[kArgMinRawGappedScore]) {
00531         opt.SetCutoffScore(args[kArgMinRawGappedScore].AsInteger());
00532     }else if (args[kArgUngappedXDropoff]) {
00533         opt.SetCutoffScore(args[kArgUngappedXDropoff].AsInteger());
00534     }
00535 }
00536 
00537 void
00538 CWordThresholdArg::SetArgumentDescriptions(CArgDescriptions& arg_desc)
00539 {
00540     arg_desc.SetCurrentGroup("General search options");
00541     // lookup table word score threshold
00542     arg_desc.AddOptionalKey(kArgWordScoreThreshold, "float_value", 
00543                  "Minimum word score such that the word is added to the "
00544                  "BLAST lookup table",
00545                  CArgDescriptions::eDouble);
00546     arg_desc.SetConstraint(kArgWordScoreThreshold, 
00547                            new CArgAllowValuesGreaterThanOrEqual(0));
00548     arg_desc.SetCurrentGroup("");
00549 }
00550 
00551 static bool
00552 s_IsDefaultWordThreshold(EProgram program, double threshold)
00553 {
00554     int word_threshold = static_cast<int>(threshold);
00555     bool retval = true;
00556     if (program == eBlastp &&
00557         word_threshold != BLAST_WORD_THRESHOLD_BLASTP) {
00558         retval = false;
00559     } else if (program == eBlastx &&
00560         word_threshold != BLAST_WORD_THRESHOLD_BLASTX) {
00561         retval = false;
00562     } else if (program == eTblastn &&
00563                word_threshold != BLAST_WORD_THRESHOLD_TBLASTN) {
00564         retval = false;
00565     }
00566     return retval;
00567 }
00568 
00569 void
00570 CWordThresholdArg::ExtractAlgorithmOptions(const CArgs& args, 
00571                                            CBlastOptions& opt)
00572 {
00573     if (args[kArgWordScoreThreshold]) {
00574         opt.SetWordThreshold(args[kArgWordScoreThreshold].AsDouble());
00575     } else if (s_IsDefaultWordThreshold(opt.GetProgram(),
00576                                         opt.GetWordThreshold())) {
00577         double threshold = -1;
00578         BLAST_GetSuggestedThreshold(opt.GetProgramType(),
00579                                     opt.GetMatrixName(),
00580                                     &threshold);
00581         if (threshold != -1) {
00582             opt.SetWordThreshold(threshold);
00583         }
00584     }
00585 }
00586 
00587 void
00588 CMatrixNameArg::SetArgumentDescriptions(CArgDescriptions& arg_desc)
00589 {
00590     arg_desc.SetCurrentGroup("General search options");
00591     arg_desc.AddOptionalKey(kArgMatrixName, "matrix_name",
00592                            "Scoring matrix name (normally BLOSUM62)",
00593                            CArgDescriptions::eString); 
00594     arg_desc.SetCurrentGroup("");
00595 }
00596 
00597 void
00598 CMatrixNameArg::ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& opt)
00599 {
00600     if (args[kArgMatrixName]) {
00601         opt.SetMatrixName(args[kArgMatrixName].AsString().c_str());
00602     }
00603 }
00604 
00605 void
00606 CNuclArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
00607 {
00608     // TLM arg_desc.SetCurrentGroup("Nucleotide scoring options");
00609 
00610     arg_desc.SetCurrentGroup("General search options");
00611     // blastn mismatch penalty
00612     arg_desc.AddOptionalKey(kArgMismatch, "penalty", 
00613                            "Penalty for a nucleotide mismatch", 
00614                            CArgDescriptions::eInteger);
00615     arg_desc.SetConstraint(kArgMismatch, 
00616                            new CArgAllowValuesLessThanOrEqual(0));
00617 
00618     // blastn match reward
00619     arg_desc.AddOptionalKey(kArgMatch, "reward", 
00620                            "Reward for a nucleotide match", 
00621                            CArgDescriptions::eInteger); 
00622     arg_desc.SetConstraint(kArgMatch, 
00623                            new CArgAllowValuesGreaterThanOrEqual(0));
00624 
00625 
00626     arg_desc.SetCurrentGroup("Extension options");
00627     arg_desc.AddFlag(kArgNoGreedyExtension,
00628                      "Use non-greedy dynamic programming extension",
00629                      true);
00630 
00631     arg_desc.SetCurrentGroup("");
00632 }
00633 
00634 void
00635 CNuclArgs::ExtractAlgorithmOptions(const CArgs& cmd_line_args,
00636                                    CBlastOptions& options)
00637 {
00638     if (cmd_line_args[kArgMismatch]) {
00639         options.SetMismatchPenalty(cmd_line_args[kArgMismatch].AsInteger());
00640     }
00641     if (cmd_line_args[kArgMatch]) {
00642         options.SetMatchReward(cmd_line_args[kArgMatch].AsInteger());
00643     }
00644 
00645     if (cmd_line_args[kArgNoGreedyExtension]) {
00646         options.SetGapExtnAlgorithm(eDynProgScoreOnly);
00647         options.SetGapTracebackAlgorithm(eDynProgTbck);
00648     }
00649 }
00650 
00651 const string CDiscontiguousMegablastArgs::kTemplType_Coding("coding");
00652 const string CDiscontiguousMegablastArgs::kTemplType_Optimal("optimal");
00653 const string 
00654 CDiscontiguousMegablastArgs::kTemplType_CodingAndOptimal("coding_and_optimal");
00655 
00656 void
00657 CDiscontiguousMegablastArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
00658 {
00659     arg_desc.SetCurrentGroup("Extension options");
00660     // FIXME: this can be applied to any program, but since it was only offered
00661     // in megablast, we're putting it here 
00662     arg_desc.AddOptionalKey(kArgMinRawGappedScore, "int_value",
00663                             "Minimum raw gapped score to keep an alignment "
00664                             "in the preliminary gapped and traceback stages",
00665                             CArgDescriptions::eInteger);
00666 
00667     arg_desc.SetCurrentGroup("Discontiguous MegaBLAST options");
00668 
00669     arg_desc.AddOptionalKey(kArgDMBTemplateType, "type", 
00670                  "Discontiguous MegaBLAST template type",
00671                  CArgDescriptions::eString);
00672     arg_desc.SetConstraint(kArgDMBTemplateType, &(*new CArgAllow_Strings, 
00673                                                   kTemplType_Coding,
00674                                                   kTemplType_Optimal,
00675                                                   kTemplType_CodingAndOptimal));
00676     arg_desc.SetDependency(kArgDMBTemplateType,
00677                            CArgDescriptions::eRequires,
00678                            kArgDMBTemplateLength);
00679 
00680     arg_desc.AddOptionalKey(kArgDMBTemplateLength, "int_value", 
00681                  "Discontiguous MegaBLAST template length",
00682                  CArgDescriptions::eInteger);
00683     set<int> allowed_values;
00684     allowed_values.insert(16);
00685     allowed_values.insert(18);
00686     allowed_values.insert(21);
00687     arg_desc.SetConstraint(kArgDMBTemplateLength, 
00688                            new CArgAllowIntegerSet(allowed_values));
00689     arg_desc.SetDependency(kArgDMBTemplateLength,
00690                            CArgDescriptions::eRequires,
00691                            kArgDMBTemplateType);
00692 
00693     arg_desc.SetCurrentGroup("");
00694 }
00695 
00696 void
00697 CDiscontiguousMegablastArgs::ExtractAlgorithmOptions(const CArgs& args,
00698                                                      CBlastOptions& options)
00699 {
00700     if (args[kArgMinRawGappedScore]) {
00701         options.SetCutoffScore(args[kArgMinRawGappedScore].AsInteger());
00702     }
00703 
00704     if (args[kArgDMBTemplateType]) {
00705         const string& type = args[kArgDMBTemplateType].AsString();
00706         EDiscWordType temp_type = eMBWordCoding;
00707 
00708         if (type == kTemplType_Coding) {
00709             temp_type = eMBWordCoding;
00710         } else if (type == kTemplType_Optimal) {
00711             temp_type = eMBWordOptimal;
00712         } else if (type == kTemplType_CodingAndOptimal) {
00713             temp_type = eMBWordTwoTemplates;
00714         } else {
00715             abort();
00716         }
00717         options.SetMBTemplateType(static_cast<unsigned char>(temp_type));
00718     }
00719 
00720     if (args[kArgDMBTemplateLength]) {
00721         unsigned char tlen = 
00722             static_cast<unsigned char>(args[kArgDMBTemplateLength].AsInteger());
00723         options.SetMBTemplateLength(tlen);
00724     }
00725 
00726     // FIXME: should the window size be adjusted if this is set?
00727 }
00728 
00729 void
00730 CCompositionBasedStatsArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
00731 {
00732     arg_desc.SetCurrentGroup("General search options");
00733     // composition based statistics, keep in sync with ECompoAdjustModes
00734     // documentation in composition_constants.h
00735 
00736     string zero_opt = !m_ZeroOptDescr.empty() ?
00737         (string)"    0 or F or f: " + m_ZeroOptDescr + "\n" :
00738         "    0 or F or f: No composition-based statistics\n";
00739 
00740     string one_opt_insrt = m_Is2and3Supported ? "" : " or T or t";
00741 
00742     string more_opts = m_Is2and3Supported ? 
00743         "    2 or T or t : Composition-based score adjustment as in "
00744         "Bioinformatics 21:902-911,\n"
00745         "    2005, conditioned on sequence properties\n"
00746         "    3: Composition-based score adjustment as in "
00747         "Bioinformatics 21:902-911,\n"
00748         "    2005, unconditionally\n" : "";
00749 
00750     string legend = (string)"Use composition-based statistics:\n"
00751         "    D or d: default (equivalent to " + m_DefaultOpt + " )\n"
00752         + zero_opt
00753         + "    1" + one_opt_insrt + ": Composition-based statistics "
00754         "as in NAR 29:2994-3005, 2001\n"
00755         + more_opts;
00756 
00757     arg_desc.AddDefaultKey(kArgCompBasedStats, "compo", legend,
00758                            CArgDescriptions::eString, m_DefaultOpt);
00759 
00760 
00761     arg_desc.SetCurrentGroup("Miscellaneous options");
00762     // Use Smith-Waterman algorithm in traceback stage
00763     // FIXME: available only for gapped blastp/tblastn, and with
00764     // composition-based statistics
00765     arg_desc.AddFlag(kArgUseSWTraceback, 
00766                      "Compute locally optimal Smith-Waterman alignments?",
00767                      true);
00768     arg_desc.SetCurrentGroup("");
00769 }
00770 
00771 /** 
00772  * @brief Auxiliary function to set the composition based statistics and smith
00773  * waterman options
00774  * 
00775  * @param opt BLAST options object [in|out]
00776  * @param comp_stat_string command line value for composition based statistics
00777  * [in]
00778  * @param smith_waterman_value command line value for determining the use of
00779  * the smith-waterman algorithm [in]
00780  * @param ungapped pointer to the value which determines whether the search
00781  * should be ungapped or not. It is NULL if ungapped searches are not
00782  * applicable
00783  * @param is_deltablast is program deltablast [in]
00784  */
00785 static void
00786 s_SetCompositionBasedStats(CBlastOptions& opt,
00787                            const string& comp_stat_string,
00788                            bool smith_waterman_value,
00789                            bool* ungapped)
00790 {
00791     const EProgram program = opt.GetProgram();
00792     if (program == eBlastp || program == eTblastn || 
00793         program == ePSIBlast || program == ePSITblastn ||
00794         program == eRPSBlast ||
00795         program == eBlastx  ||  program == eDeltaBlast) {
00796 
00797         ECompoAdjustModes compo_mode = eNoCompositionBasedStats;
00798     
00799         switch (comp_stat_string[0]) {
00800             case '0': case 'F': case 'f':
00801                 compo_mode = eNoCompositionBasedStats;
00802                 break;
00803             case '1':
00804                 compo_mode = eCompositionBasedStats;
00805                 break;
00806             case 'D': case 'd':
00807                 if (program == eRPSBlast) {
00808                     compo_mode = eNoCompositionBasedStats;
00809                 }
00810                 else if (program == eDeltaBlast) {
00811                     compo_mode = eCompositionBasedStats;
00812                 }
00813                 else {
00814                     compo_mode = eCompositionMatrixAdjust;
00815                 }
00816                 break;
00817             case '2':
00818                 compo_mode = eCompositionMatrixAdjust;
00819                 break;
00820             case '3':
00821                 compo_mode = eCompoForceFullMatrixAdjust;
00822                 break;
00823             case 'T': case 't':
00824                 compo_mode = (program == eRPSBlast || program == eDeltaBlast) ?
00825                     eCompositionBasedStats : eCompositionMatrixAdjust;
00826                 break;
00827         } 
00828 
00829         if(program == ePSITblastn) {
00830             compo_mode = eNoCompositionBasedStats;
00831         }
00832 
00833         if (ungapped && *ungapped && compo_mode != eNoCompositionBasedStats) {
00834             NCBI_THROW(CInputException, eInvalidInput, 
00835                        "Composition-adjusted searched are not supported with "
00836                        "an ungapped search, please add -comp_based_stats F or "
00837                        "do a gapped search");
00838         }
00839 
00840         opt.SetCompositionBasedStats(compo_mode);
00841         if (program == eBlastp &&
00842             compo_mode != eNoCompositionBasedStats &&
00843             tolower(comp_stat_string[1]) == 'u') {
00844             opt.SetUnifiedP(1);
00845         }
00846         opt.SetSmithWatermanMode(smith_waterman_value);
00847     }
00848 }
00849 
00850 void
00851 CCompositionBasedStatsArgs::ExtractAlgorithmOptions(const CArgs& args,
00852                                                     CBlastOptions& opt)
00853 {
00854     if (args[kArgCompBasedStats]) {
00855         auto_ptr<bool> ungapped(args.Exist(kArgUngapped) 
00856             ? new bool(args[kArgUngapped]) : 0);
00857         s_SetCompositionBasedStats(opt, 
00858                                    args[kArgCompBasedStats].AsString(),
00859                                    args[kArgUseSWTraceback],
00860                                    ungapped.get());
00861     }
00862 
00863 }
00864 
00865 void
00866 CGappedArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
00867 {
00868     // perform gapped search
00869 #if 0
00870     arg_desc.AddOptionalKey(ARG_GAPPED, "gapped", 
00871                  "Perform gapped alignment (default T, but "
00872                  "not available for tblastx)",
00873                  CArgDescriptions::eBoolean,
00874                  CArgDescriptions::fOptionalSeparator);
00875     arg_desc.AddAlias("-gapped", ARG_GAPPED);
00876 #endif
00877     arg_desc.SetCurrentGroup("Extension options");
00878     arg_desc.AddFlag(kArgUngapped, "Perform ungapped alignment only?", true);
00879     arg_desc.SetCurrentGroup("");
00880 }
00881 
00882 void
00883 CGappedArgs::ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& options)
00884 {
00885 #if 0
00886     if (args[ARG_GAPPED] && options.GetProgram() != eTblastx) {
00887         options.SetGappedMode(args[ARG_GAPPED].AsBoolean());
00888     }
00889 #endif
00890     options.SetGappedMode( !args[kArgUngapped] );
00891 }
00892 
00893 void
00894 CLargestIntronSizeArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
00895 {
00896     arg_desc.SetCurrentGroup("General search options");
00897     // largest intron length
00898     arg_desc.AddDefaultKey(kArgMaxIntronLength, "length", 
00899                     "Length of the largest intron allowed in a translated "
00900                     "nucleotide sequence when linking multiple distinct "
00901                     "alignments",
00902                     CArgDescriptions::eInteger,
00903                     NStr::IntToString(kDfltArgMaxIntronLength));
00904     arg_desc.SetConstraint(kArgMaxIntronLength,
00905                            new CArgAllowValuesGreaterThanOrEqual(0));
00906     arg_desc.SetCurrentGroup("");
00907 }
00908 
00909 void
00910 CLargestIntronSizeArgs::ExtractAlgorithmOptions(const CArgs& args,
00911                                                 CBlastOptions& opt)
00912 {
00913     if ( !args[kArgMaxIntronLength] ) {
00914         return;
00915     }
00916 
00917     // sum statistics are defauled to be on unless a cmdline option is set
00918     opt.SetLongestIntronLength(args[kArgMaxIntronLength].AsInteger());
00919    
00920 }
00921 
00922 void
00923 CFrameShiftArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
00924 {
00925     arg_desc.SetCurrentGroup("General search options");
00926     // applicable in blastx/tblastn, off by default
00927     arg_desc.AddOptionalKey(kArgFrameShiftPenalty, "frameshift",
00928                             "Frame shift penalty (for use with out-of-frame "
00929                             "gapped alignment in blastx or tblastn, default "
00930                             "ignored)",
00931                             CArgDescriptions::eInteger);
00932     arg_desc.SetConstraint(kArgFrameShiftPenalty, 
00933                            new CArgAllowValuesGreaterThanOrEqual(1));
00934     arg_desc.SetDependency(kArgFrameShiftPenalty, CArgDescriptions::eExcludes,kArgUngapped);
00935     arg_desc.SetCurrentGroup("");
00936 }
00937 
00938 void
00939 CFrameShiftArgs::ExtractAlgorithmOptions(const CArgs& args,
00940                                          CBlastOptions& opt)
00941 {
00942     if (args[kArgFrameShiftPenalty]) {
00943         if (args[kArgCompBasedStats]) {
00944             string cbs = args[kArgCompBasedStats].AsString();
00945 
00946             if ((cbs[0] != '0' )&& (cbs[0] != 'F')  &&  (cbs[0] != 'f')) {
00947                 NCBI_THROW(CInputException, eInvalidInput,
00948                        "Composition-adjusted searches are not supported with "
00949                        "Out-Of-Frame option, please add -comp_based_stats F ");
00950             }
00951         }
00952 
00953         opt.SetOutOfFrameMode();
00954         opt.SetFrameShiftPenalty(args[kArgFrameShiftPenalty].AsInteger());
00955     }
00956 }
00957 
00958 /// Auxiliary class to validate the genetic code input
00959 class CArgAllowGeneticCodeInteger : public CArgAllow
00960 {
00961 protected:
00962      /// Overloaded method from CArgAllow
00963      virtual bool Verify(const string& value) const {
00964          static int gcs[] = {1,2,3,4,5,6,9,10,11,12,13,14,15,16,21,22,23,24,25};
00965          static const set<int> genetic_codes(gcs, gcs+sizeof(gcs)/sizeof(*gcs));
00966          const int val = NStr::StringToInt(value);
00967          return (genetic_codes.find(val) != genetic_codes.end());
00968      }
00969 
00970      /// Overloaded method from CArgAllow
00971      virtual string GetUsage(void) const {
00972          return "values between: 1-6, 9-16, 21-25";
00973      }
00974 };
00975 
00976 void
00977 CGeneticCodeArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
00978 {
00979     if (m_Target == eQuery) {
00980         arg_desc.SetCurrentGroup("Input query options");
00981         // query genetic code
00982         arg_desc.AddDefaultKey(kArgQueryGeneticCode, "int_value", 
00983                                "Genetic code to use to translate query (see user manual for details)\n",
00984                                CArgDescriptions::eInteger,
00985                                NStr::IntToString(BLAST_GENETIC_CODE));
00986         arg_desc.SetConstraint(kArgQueryGeneticCode,
00987                                new CArgAllowGeneticCodeInteger());
00988     } else {
00989         arg_desc.SetCurrentGroup("General search options");
00990         // DB genetic code
00991         arg_desc.AddDefaultKey(kArgDbGeneticCode, "int_value", 
00992                                "Genetic code to use to translate "
00993                                "database/subjects (see user manual for details)\n",
00994                                CArgDescriptions::eInteger,
00995                                NStr::IntToString(BLAST_GENETIC_CODE));
00996         arg_desc.SetConstraint(kArgDbGeneticCode,
00997                                new CArgAllowGeneticCodeInteger());
00998 
00999     }
01000     arg_desc.SetCurrentGroup("");
01001 }
01002 
01003 void
01004 CGeneticCodeArgs::ExtractAlgorithmOptions(const CArgs& args,
01005                                           CBlastOptions& opt)
01006 {
01007     const EProgram program = opt.GetProgram();
01008 
01009     if (m_Target == eQuery && args[kArgQueryGeneticCode]) {
01010         opt.SetQueryGeneticCode(args[kArgQueryGeneticCode].AsInteger());
01011     }
01012   
01013     if (m_Target == eDatabase && args[kArgDbGeneticCode] &&
01014         (program == eTblastn || program == eTblastx) ) {
01015         opt.SetDbGeneticCode(args[kArgDbGeneticCode].AsInteger());
01016     }
01017 }
01018 
01019 void
01020 CGapTriggerArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
01021 {
01022     arg_desc.SetCurrentGroup("Extension options");
01023 
01024     const double default_value = m_QueryIsProtein
01025         ? BLAST_GAP_TRIGGER_PROT : BLAST_GAP_TRIGGER_NUCL;
01026     arg_desc.AddDefaultKey(kArgGapTrigger, "float_value", 
01027                            "Number of bits to trigger gapping",
01028                            CArgDescriptions::eDouble,
01029                            NStr::DoubleToString(default_value));
01030     arg_desc.SetCurrentGroup("");
01031 }
01032 
01033 void
01034 CGapTriggerArgs::ExtractAlgorithmOptions(const CArgs& args,
01035                                          CBlastOptions& opt)
01036 {
01037     if (args[kArgGapTrigger]) {
01038         opt.SetGapTrigger(args[kArgGapTrigger].AsDouble());
01039     }
01040 }
01041 
01042 void
01043 CPssmEngineArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
01044 {
01045     arg_desc.SetCurrentGroup("PSSM engine options");
01046 
01047     // Pseudo count
01048     arg_desc.AddDefaultKey(kArgPSIPseudocount, "pseudocount",
01049                            "Pseudo-count value used when constructing PSSM",
01050                            CArgDescriptions::eInteger,
01051                            NStr::IntToString(PSI_PSEUDO_COUNT_CONST));
01052 
01053     if (m_IsDeltaBlast) {
01054         arg_desc.AddDefaultKey(kArgDomainInclusionEThreshold, "ethresh",
01055                                "E-value inclusion threshold for alignments "
01056                                "with conserved domains",
01057                                CArgDescriptions::eDouble,
01058                                NStr::DoubleToString(DELTA_INCLUSION_ETHRESH));
01059     }
01060 
01061     // Evalue inclusion threshold
01062     arg_desc.AddDefaultKey(kArgPSIInclusionEThreshold, "ethresh", 
01063                    "E-value inclusion threshold for pairwise alignments", 
01064                    CArgDescriptions::eDouble,
01065                    NStr::DoubleToString(PSI_INCLUSION_ETHRESH));
01066 
01067     arg_desc.SetCurrentGroup("");
01068 }
01069 
01070 void
01071 CPssmEngineArgs::ExtractAlgorithmOptions(const CArgs& args,
01072                                          CBlastOptions& opt)
01073 {
01074     if (args[kArgPSIPseudocount]) {
01075         opt.SetPseudoCount(args[kArgPSIPseudocount].AsInteger());
01076     }
01077 
01078     if (args[kArgPSIInclusionEThreshold]) {
01079         opt.SetInclusionThreshold(args[kArgPSIInclusionEThreshold].AsDouble());
01080     }
01081 
01082     if (args.Exist(kArgDomainInclusionEThreshold)
01083         && args[kArgDomainInclusionEThreshold]) {
01084 
01085         opt.SetDomainInclusionThreshold(
01086                              args[kArgDomainInclusionEThreshold].AsDouble());
01087     }
01088 }
01089 
01090 void
01091 CPsiBlastArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
01092 {
01093 
01094     if (m_DbTarget == eNucleotideDb) {
01095         arg_desc.SetCurrentGroup("PSI-TBLASTN options");
01096 
01097         // PSI-tblastn checkpoint
01098         arg_desc.AddOptionalKey(kArgPSIInputChkPntFile, "psi_chkpt_file", 
01099                                 "PSI-TBLASTN checkpoint file",
01100                                 CArgDescriptions::eInputFile);
01101         arg_desc.SetDependency(kArgPSIInputChkPntFile,
01102                                 CArgDescriptions::eExcludes,
01103                                 kArgRemote);
01104     } else {
01105         arg_desc.SetCurrentGroup("PSI-BLAST options");
01106 
01107         // Number of iterations
01108         arg_desc.AddDefaultKey(kArgPSINumIterations, "int_value",
01109                                "Number of iterations to perform (0 means run "
01110                                "until convergence)", CArgDescriptions::eInteger,
01111                                NStr::IntToString(kDfltArgPSINumIterations));
01112         arg_desc.SetConstraint(kArgPSINumIterations, 
01113                                new CArgAllowValuesGreaterThanOrEqual(0));
01114         arg_desc.SetDependency(kArgPSINumIterations,
01115                                CArgDescriptions::eExcludes,
01116                                kArgRemote);
01117         // checkpoint file
01118         arg_desc.AddOptionalKey(kArgPSIOutputChkPntFile, "checkpoint_file",
01119 
01120                                 "File name to store checkpoint file",
01121                                 CArgDescriptions::eOutputFile);
01122         // ASCII matrix file
01123         arg_desc.AddOptionalKey(kArgAsciiPssmOutputFile, "ascii_mtx_file",
01124                                 "File name to store ASCII version of PSSM",
01125                                 CArgDescriptions::eOutputFile);
01126 
01127         if (!m_IsDeltaBlast) {
01128             vector<string> msa_exclusions;
01129             msa_exclusions.push_back(kArgPSIInputChkPntFile);
01130             msa_exclusions.push_back(kArgQuery);
01131             msa_exclusions.push_back(kArgQueryLocation);
01132             // pattern and MSA is not supported
01133             msa_exclusions.push_back(kArgPHIPatternFile);   
01134             arg_desc.SetCurrentGroup("");
01135             arg_desc.SetCurrentGroup("");
01136 
01137             // MSA restart file
01138             arg_desc.SetCurrentGroup("PSSM engine options");
01139             arg_desc.AddOptionalKey(kArgMSAInputFile, "align_restart",
01140                                     "File name of multiple sequence alignment to "
01141                                     "restart PSI-BLAST",
01142                                     CArgDescriptions::eInputFile);
01143             ITERATE(vector<string>, exclusion, msa_exclusions) {
01144                 arg_desc.SetDependency(kArgMSAInputFile,
01145                                        CArgDescriptions::eExcludes,
01146                                        *exclusion);
01147             }
01148 
01149             arg_desc.AddOptionalKey(kArgMSAMasterIndex, "index",
01150                                     "Ordinal number (1-based index) of the sequence"
01151                                     " to use as a master in the multiple sequence "
01152                                     "alignment. If not provided, the first sequence"
01153                                     " in the multiple sequence alignment will be "
01154                                     "used", CArgDescriptions::eInteger);
01155             arg_desc.SetConstraint(kArgMSAMasterIndex, 
01156                                    new CArgAllowValuesGreaterThanOrEqual(1));
01157             ITERATE(vector<string>, exclusion, msa_exclusions) {
01158                 arg_desc.SetDependency(kArgMSAMasterIndex,
01159                                        CArgDescriptions::eExcludes,
01160                                        *exclusion);
01161             }
01162             arg_desc.SetDependency(kArgMSAMasterIndex,
01163                                    CArgDescriptions::eRequires,
01164                                    kArgMSAInputFile);
01165             arg_desc.SetDependency(kArgMSAMasterIndex,
01166                                    CArgDescriptions::eExcludes,
01167                                    kArgIgnoreMsaMaster);
01168 
01169             arg_desc.AddFlag(kArgIgnoreMsaMaster, 
01170                              "Ignore the master sequence when creating PSSM", true);
01171 
01172             vector<string> ignore_pssm_master_exclusions;
01173             ignore_pssm_master_exclusions.push_back(kArgMSAMasterIndex);
01174             ignore_pssm_master_exclusions.push_back(kArgPSIInputChkPntFile);
01175             ignore_pssm_master_exclusions.push_back(kArgQuery);
01176             ignore_pssm_master_exclusions.push_back(kArgQueryLocation);
01177             ITERATE(vector<string>, exclusion, msa_exclusions) {
01178                 arg_desc.SetDependency(kArgIgnoreMsaMaster,
01179                                        CArgDescriptions::eExcludes,
01180                                        *exclusion);
01181             }
01182             arg_desc.SetDependency(kArgIgnoreMsaMaster,
01183                                    CArgDescriptions::eRequires,
01184                                    kArgMSAInputFile);
01185 
01186             // PSI-BLAST checkpoint
01187             arg_desc.AddOptionalKey(kArgPSIInputChkPntFile, "psi_chkpt_file", 
01188                                     "PSI-BLAST checkpoint file",
01189                                     CArgDescriptions::eInputFile);
01190         }
01191     }
01192 
01193     if (!m_IsDeltaBlast) {
01194         arg_desc.SetDependency(kArgPSIInputChkPntFile,
01195                                CArgDescriptions::eExcludes,
01196                                kArgQuery);
01197         arg_desc.SetDependency(kArgPSIInputChkPntFile,
01198                                CArgDescriptions::eExcludes,
01199                                kArgQueryLocation);
01200     }
01201     arg_desc.SetCurrentGroup("");
01202 }
01203 
01204 CRef<CPssmWithParameters>
01205 CPsiBlastArgs::x_CreatePssmFromMsa(CNcbiIstream& input_stream, 
01206                                    CBlastOptions& opt, bool save_ascii_pssm, 
01207                                    unsigned int msa_master_idx,
01208                                    bool ignore_pssm_tmplt_seq)
01209 {
01210     // FIXME get these from CBlastOptions
01211     CPSIBlastOptions psiblast_opts;
01212     PSIBlastOptionsNew(&psiblast_opts); 
01213     psiblast_opts->nsg_compatibility_mode = ignore_pssm_tmplt_seq;
01214 
01215     CPSIDiagnosticsRequest diags(PSIDiagnosticsRequestNewEx(save_ascii_pssm));
01216     CPsiBlastInputClustalW pssm_input(input_stream, *psiblast_opts,
01217                                       opt.GetMatrixName(), diags, NULL, 0,
01218                                       opt.GetGapOpeningCost(),
01219                                       opt.GetGapExtensionCost(),
01220                                       msa_master_idx);
01221     CPssmEngine pssm_engine(&pssm_input);
01222     return pssm_engine.Run();
01223 }
01224 
01225 void
01226 CPsiBlastArgs::ExtractAlgorithmOptions(const CArgs& args,
01227                                        CBlastOptions& opt)
01228 {
01229     if (m_DbTarget == eProteinDb) {
01230         if (args[kArgPSINumIterations]) {
01231             if(m_NumIterations == 1)
01232                 m_NumIterations = args[kArgPSINumIterations].AsInteger();
01233         }
01234         if (args.Exist(kArgPSIOutputChkPntFile) &&
01235             args[kArgPSIOutputChkPntFile]) {
01236             m_CheckPointOutput.Reset
01237                 (new CAutoOutputFileReset
01238                  (args[kArgPSIOutputChkPntFile].AsString())); 
01239         }
01240         const bool kSaveAsciiPssm = args[kArgAsciiPssmOutputFile];
01241         if (kSaveAsciiPssm) {
01242             m_AsciiMatrixOutput.Reset
01243                 (new CAutoOutputFileReset
01244                  (args[kArgAsciiPssmOutputFile].AsString()));
01245         }
01246         if (args.Exist(kArgMSAInputFile) && args[kArgMSAInputFile]) {
01247             CNcbiIstream& in = args[kArgMSAInputFile].AsInputFile();
01248             unsigned int msa_master_idx = 0;
01249             if (args[kArgMSAMasterIndex]) {
01250                 msa_master_idx = args[kArgMSAMasterIndex].AsInteger() - 1;
01251             }
01252             m_Pssm = x_CreatePssmFromMsa(in, opt, kSaveAsciiPssm,
01253                                          msa_master_idx, 
01254                                          args[kArgIgnoreMsaMaster]);
01255         }
01256         if (!m_IsDeltaBlast) {
01257             opt.SetIgnoreMsaMaster(args[kArgIgnoreMsaMaster]);
01258         }
01259     }
01260 
01261     if (args.Exist(kArgPSIInputChkPntFile) && args[kArgPSIInputChkPntFile]) {
01262         CNcbiIstream& in = args[kArgPSIInputChkPntFile].AsInputFile();
01263         _ASSERT(m_Pssm.Empty());
01264         m_Pssm.Reset(new CPssmWithParameters);
01265         try {
01266             switch (CFormatGuess().Format(in)) {
01267             case CFormatGuess::eBinaryASN:
01268                 in >> MSerial_AsnBinary >> *m_Pssm;
01269                 break;
01270             case CFormatGuess::eTextASN:
01271                 in >> MSerial_AsnText >> *m_Pssm;
01272                 break;
01273             case CFormatGuess::eXml:
01274                 in >> MSerial_Xml >> *m_Pssm;
01275                 break;
01276             default:
01277                 NCBI_THROW(CInputException, eInvalidInput, 
01278                            "Unsupported format for PSSM");
01279             }
01280         } catch (const CSerialException&) {
01281             string msg("Unrecognized format for PSSM in ");
01282             msg += args[kArgPSIInputChkPntFile].AsString() + " (must be ";
01283             msg += "PssmWithParameters)";
01284             NCBI_THROW(CInputException, eInvalidInput, msg);
01285         }
01286         _ASSERT(m_Pssm.NotEmpty());
01287     }
01288 }
01289 
01290 void
01291 CPhiBlastArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
01292 {
01293     arg_desc.SetCurrentGroup("PHI-BLAST options");
01294 
01295     arg_desc.AddOptionalKey(kArgPHIPatternFile, "file",
01296                             "File name containing pattern to search",
01297                             CArgDescriptions::eInputFile);
01298     arg_desc.SetDependency(kArgPHIPatternFile,
01299                            CArgDescriptions::eExcludes,
01300                            kArgPSIInputChkPntFile);
01301 
01302     arg_desc.SetCurrentGroup("");
01303 }
01304 
01305 void
01306 CPhiBlastArgs::ExtractAlgorithmOptions(const CArgs& args,
01307                                        CBlastOptions& opt)
01308 {
01309     if (args.Exist(kArgPHIPatternFile) && args[kArgPHIPatternFile]) {
01310         CNcbiIstream& in = args[kArgPHIPatternFile].AsInputFile();
01311         in.clear();
01312         in.seekg(0);
01313         char buffer[4096];
01314         string line;
01315         string pattern;
01316         string name;
01317         while (in.getline(buffer, 4096)) {
01318            line = buffer;
01319            string ltype = line.substr(0, 2);
01320            if (ltype == "ID") 
01321              name = line.substr(5);
01322            else if (ltype == "PA")
01323              pattern = line.substr(5);
01324         }
01325         if (!pattern.empty())
01326             opt.SetPHIPattern(pattern.c_str(), 
01327                (Blast_QueryIsNucleotide(opt.GetProgramType())
01328                ? true : false));
01329         else
01330             NCBI_THROW(CInputException, eInvalidInput, 
01331                        "PHI pattern not read");
01332     }
01333 }
01334 
01335 void
01336 CDeltaBlastArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
01337 {
01338     arg_desc.SetCurrentGroup("DELTA-BLAST options");
01339 
01340     arg_desc.AddDefaultKey(kArgRpsDb, "database_name", "BLAST domain "
01341                            "database name", CArgDescriptions::eString,
01342                            kDfltArgRpsDb);
01343 
01344     arg_desc.AddFlag(kArgShowDomainHits, "Show domain hits");
01345     arg_desc.SetDependency(kArgShowDomainHits, CArgDescriptions::eExcludes,
01346                            kArgRemote);
01347     arg_desc.SetDependency(kArgShowDomainHits, CArgDescriptions::eExcludes,
01348                            kArgSubject);
01349 }
01350 
01351 void
01352 CDeltaBlastArgs::ExtractAlgorithmOptions(const CArgs& args,
01353                                          CBlastOptions& opt)
01354 {
01355     m_DomainDb.Reset(new CSearchDatabase(args[kArgRpsDb].AsString(),
01356                                          CSearchDatabase::eBlastDbIsProtein));
01357 
01358     if (args.Exist(kArgShowDomainHits)) {
01359         m_ShowDomainHits = args[kArgShowDomainHits];
01360     }
01361 }
01362 
01363 void
01364 CIgBlastArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
01365 {
01366     arg_desc.SetCurrentGroup("Ig-BLAST options");
01367     const static char suffix[] = "VDJ";
01368     const static int df_num_align[3] = {3,3,3};
01369     int num_genes = (m_IsProtein) ? 1 : 3;
01370 
01371     for (int gene=0; gene<num_genes; ++gene) {
01372         // Subject sequence input
01373         /* TODO disabled for now
01374         string arg_sub = kArgGLSubject;
01375         arg_sub.push_back(suffix[gene]);
01376         arg_desc.AddOptionalKey(arg_sub , "filename",
01377                             "Germline subject sequence to align",
01378                             CArgDescriptions::eInputFile);
01379         */
01380         // Germline database file name
01381         string arg_db = kArgGLDatabase;
01382         arg_db.push_back(suffix[gene]);
01383         arg_desc.AddOptionalKey(arg_db, "germline_database_name",
01384                             "Germline database name",
01385                             CArgDescriptions::eString);
01386         //arg_desc.SetDependency(arg_db, CArgDescriptions::eExcludes, arg_sub);
01387         // Number of alignments to show
01388         string arg_na = kArgGLNumAlign;
01389         arg_na.push_back(suffix[gene]);
01390         arg_desc.AddDefaultKey(arg_na, "int_value",
01391                             "Number of Germline sequences to show alignments for",
01392                             CArgDescriptions::eInteger,
01393                             NStr::IntToString(df_num_align[gene]));
01394         //arg_desc.SetConstraint(arg_na,
01395         //                   new CArgAllowValuesBetween(0, 4));
01396         // Seqidlist
01397         arg_desc.AddOptionalKey(arg_db + "_seqidlist", "filename",
01398                             "Restrict search of germline database to list of SeqIds's",
01399                             CArgDescriptions::eString);
01400     }
01401 
01402     if (!m_IsProtein) {
01403         arg_desc.AddOptionalKey(kArgGLChainType, "filename",
01404                             "File containing the coding frame start positions for sequences in germline J database",
01405                             CArgDescriptions::eString);
01406         
01407         arg_desc.AddOptionalKey(kArgMinDMatch, "min_D_match",
01408                                 "Required minimal number of D gene matches ",
01409                                 CArgDescriptions::eInteger);
01410         arg_desc.SetConstraint(kArgMinDMatch, 
01411                                new CArgAllowValuesGreaterThanOrEqual(5));
01412         
01413         arg_desc.AddDefaultKey(kArgDPenalty, "D_penalty",
01414                                 "Penalty for a nucleotide mismatch in D gene",
01415                                 CArgDescriptions::eInteger, "-4");
01416 
01417         arg_desc.SetConstraint(kArgDPenalty, 
01418                                new CArgAllowValuesBetween(-6, 0));
01419     }
01420 
01421     arg_desc.AddDefaultKey(kArgGLOrigin, "germline_origin",
01422                             "The organism for your query sequence (i.e., human, mouse, etc.)",
01423                             CArgDescriptions::eString, "human");
01424     arg_desc.SetConstraint(kArgGLOrigin, &(*new CArgAllow_Strings, "human", "mouse", "rat", "rabbit", "rhesus_monkey"));
01425 
01426     arg_desc.AddDefaultKey(kArgGLDomainSystem, "domain_system",
01427                             "Domain system to be used for segment annotation",
01428                             CArgDescriptions::eString, "kabat");
01429     arg_desc.SetConstraint(kArgGLDomainSystem, &(*new CArgAllow_Strings, "kabat", "imgt"));
01430 
01431     arg_desc.AddDefaultKey(kArgIgSeqType, "sequence_type",
01432                             "Specify Ig or T cell receptor sequence",
01433                             CArgDescriptions::eString, "Ig");
01434     arg_desc.SetConstraint(kArgIgSeqType, &(*new CArgAllow_Strings, "Ig", "TCR"));
01435 
01436 
01437     arg_desc.AddFlag(kArgGLFocusV, "Should the search only be for V segment (effective only for non-germline database search using -db option)?", true);
01438 
01439     if (! m_IsProtein) {
01440         arg_desc.AddFlag(kArgTranslate, "Show translated alignments", true);
01441     }
01442 
01443     arg_desc.SetCurrentGroup("");
01444 }
01445 
01446 static string s_RegisterOMDataLoader(CRef<CSeqDB> db_handle)
01447 {    // the blast formatter requires that the database coexist in
01448     // the same scope with the query sequences
01449     CRef<CObjectManager> om = CObjectManager::GetInstance();                          
01450     CBlastDbDataLoader::RegisterInObjectManager(*om, db_handle, true,                 
01451                         CObjectManager::eDefault,                                     
01452                         CBlastDatabaseArgs::kSubjectsDataLoaderPriority);             
01453     CBlastDbDataLoader::SBlastDbParam param(db_handle);                               
01454     string retval(CBlastDbDataLoader::GetLoaderNameFromArgs(param));                  
01455     _TRACE("Registering " << retval << " at priority " <<                             
01456            (int)CBlastDatabaseArgs::kSubjectsDataLoaderPriority);                     
01457     return retval;                                                                    
01458 }               
01459 
01460 void
01461 CIgBlastArgs::ExtractAlgorithmOptions(const CArgs& args,
01462                                       CBlastOptions& opts)
01463 {
01464     string paths[3];
01465     CNcbiEnvironment env;
01466     CMetaRegistry::SEntry sentry = 
01467             CMetaRegistry::Load("ncbi", CMetaRegistry::eName_RcOrIni);
01468     paths[0] = CDirEntry::NormalizePath(CDir::GetCwd(), eFollowLinks);
01469     paths[1] = CDirEntry::NormalizePath(env.Get("IGDATA"), eFollowLinks);
01470     if (sentry.registry) {
01471        paths[2] = CDirEntry::NormalizePath(sentry.registry->Get("BLAST","IGDATA"), eFollowLinks);
01472     } else {
01473 #if defined(NCBI_OS_DARWIN)
01474        paths[2] = "/usr/local/ncbi/igblast/data";
01475 #else        
01476        paths[2] = paths[0];
01477 #endif        
01478     }
01479 
01480     m_IgOptions.Reset(new CIgBlastOptions());
01481     
01482     CBlastDatabaseArgs::EMoleculeType mol_type = Blast_SubjectIsNucleotide(opts.GetProgramType())
01483         ? CSearchDatabase::eBlastDbIsNucleotide
01484         : CSearchDatabase::eBlastDbIsProtein;
01485 
01486     m_IgOptions->m_IsProtein = (mol_type == CSearchDatabase::eBlastDbIsProtein);
01487     m_IgOptions->m_Origin = args[kArgGLOrigin].AsString();
01488     m_IgOptions->m_DomainSystem = args[kArgGLDomainSystem].AsString();
01489     m_IgOptions->m_FocusV = args.Exist(kArgGLFocusV) ? args[kArgGLFocusV] : false;
01490     m_IgOptions->m_Translate = args.Exist(kArgTranslate) ? args[kArgTranslate] : false;
01491     if (!m_IsProtein) {
01492         string aux_file = (args.Exist(kArgGLChainType) && args[kArgGLChainType])
01493                              ? args[kArgGLChainType].AsString()
01494                              : m_IgOptions->m_Origin + "_gl.aux";
01495         m_IgOptions->m_AuxFilename = aux_file;
01496         for (int i=0; i<3; i++) {
01497             string aux_path = CDirEntry::ConcatPath(paths[i], aux_file);
01498             CDirEntry entry(aux_path);
01499             if (entry.Exists() && entry.IsFile()) {
01500                 m_IgOptions->m_AuxFilename = aux_path;
01501                 break;
01502             }
01503         }
01504     }
01505 
01506     _ASSERT(m_IsProtein == m_IgOptions->m_IsProtein);
01507 
01508     m_Scope.Reset(new CScope(*CObjectManager::GetInstance()));
01509 
01510     // default germline database name for annotation
01511     for (int i=0; i<3; i++) {
01512         string int_data = CDirEntry::ConcatPath(paths[i], "internal_data");
01513         CDirEntry entry(int_data);
01514         if (entry.Exists() && entry.IsDir()) {
01515             m_IgOptions->m_IgDataPath = int_data;
01516             break;
01517         }
01518     }
01519 
01520     m_IgOptions->m_SequenceType = "Ig";
01521     if (args.Exist(kArgIgSeqType) && args[kArgIgSeqType]) {
01522         m_IgOptions->m_SequenceType = args[kArgIgSeqType].AsString();
01523     }
01524 
01525     string df_db_name = CDirEntry::ConcatPath(
01526                         CDirEntry::ConcatPath(m_IgOptions->m_IgDataPath, 
01527                            m_IgOptions->m_Origin), m_IgOptions->m_Origin + 
01528                         ((m_IgOptions->m_SequenceType == "TCR")?"_TR":"") + "_V");
01529     CRef<CSearchDatabase> db(new CSearchDatabase(df_db_name, mol_type));
01530     m_IgOptions->m_Db[3].Reset(new CLocalDbAdapter(*db));
01531     try {
01532         db->GetSeqDb();
01533     } catch(...) {
01534         NCBI_THROW(CInputException, eInvalidInput,
01535            "Germline annotation database " + df_db_name + " could not be found in [internal_data] directory");
01536     }
01537 
01538     m_IgOptions->m_Min_D_match = 5;
01539     if (args.Exist(kArgMinDMatch) && args[kArgMinDMatch]) {
01540         m_IgOptions->m_Min_D_match = args[kArgMinDMatch].AsInteger();
01541     }
01542    
01543     if (args.Exist(kArgDPenalty) && args[kArgDPenalty]) {
01544         m_IgOptions->m_D_penalty = args[kArgDPenalty].AsInteger();
01545     }
01546 
01547 
01548     CRef<CBlastOptionsHandle> opts_hndl;
01549     if (m_IgOptions->m_IsProtein) {
01550         opts_hndl.Reset(CBlastOptionsFactory::Create(eBlastp));
01551     } else {
01552         opts_hndl.Reset(CBlastOptionsFactory::Create(eBlastn));
01553     }
01554 
01555     const static char suffix[] = "VDJ";
01556     int num_genes = (m_IsProtein) ? 1: 3;
01557     for (int gene=0; gene< num_genes; ++gene) {
01558         string arg_sub = kArgGLSubject;
01559         string arg_db  = kArgGLDatabase;
01560         string arg_na  = kArgGLNumAlign;
01561 
01562         arg_sub.push_back(suffix[gene]);
01563         arg_db.push_back(suffix[gene]);
01564         arg_na.push_back(suffix[gene]);
01565 
01566         m_IgOptions->m_NumAlign[gene] = args[arg_na].AsInteger();
01567 
01568         if (args.Exist(arg_sub) && args[arg_sub]) {
01569             CNcbiIstream& subj_input_stream = args[arg_sub].AsInputFile();
01570             TSeqRange subj_range;
01571 
01572             const bool parse_deflines = args.Exist(kArgParseDeflines) 
01573             ? bool(args[kArgParseDeflines])
01574                 : kDfltArgParseDeflines;
01575             const bool use_lcase_masks = args.Exist(kArgUseLCaseMasking)
01576             ? bool(args[kArgUseLCaseMasking])
01577                 : kDfltArgUseLCaseMasking;
01578             CRef<blast::CBlastQueryVector> subjects;
01579             CRef<CScope> scope = ReadSequencesToBlast(subj_input_stream, 
01580                                        m_IgOptions->m_IsProtein,
01581                                        subj_range, parse_deflines,
01582                                        use_lcase_masks, subjects);
01583             m_Scope->AddScope(*scope, 
01584                         CBlastDatabaseArgs::kSubjectsDataLoaderPriority);
01585             CRef<IQueryFactory> sub_seqs(
01586                                 new blast::CObjMgr_QueryFactory(*subjects));
01587             m_IgOptions->m_Db[gene].Reset(new CLocalDbAdapter(
01588                                        sub_seqs, opts_hndl));
01589         } else { 
01590             string gl_db_name = m_IgOptions->m_Origin + "_gl_";
01591             gl_db_name.push_back(suffix[gene]);
01592             string db_name = (args.Exist(arg_db) && args[arg_db]) 
01593                        ?  args[arg_db].AsString() :  gl_db_name;
01594             db.Reset(new CSearchDatabase(db_name, mol_type));
01595 
01596             if (args.Exist(arg_db + "_seqidlist") && args[arg_db + "_seqidlist"]) {
01597                 string fn(SeqDB_ResolveDbPath(args[arg_db + "_seqidlist"].AsString()));
01598                 db->SetGiList(CRef<CSeqDBGiList> (new CSeqDBFileGiList(fn,
01599                              CSeqDBFileGiList::eSiList)));
01600             }
01601 
01602             m_IgOptions->m_Db[gene].Reset(new CLocalDbAdapter(*db));
01603             m_Scope->AddDataLoader(s_RegisterOMDataLoader(db->GetSeqDb()));
01604         }
01605     }
01606 }
01607 
01608 void
01609 CQueryOptionsArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
01610 {
01611 
01612     arg_desc.SetCurrentGroup("Query filtering options");
01613     // lowercase masking
01614     arg_desc.AddFlag(kArgUseLCaseMasking, 
01615          "Use lower case filtering in query and subject sequence(s)?", true);
01616 
01617     arg_desc.SetCurrentGroup("Input query options");
01618     // query location
01619     arg_desc.AddOptionalKey(kArgQueryLocation, "range", 
01620                             "Location on the query sequence in 1-based offsets "
01621                             "(Format: start-stop)",
01622                             CArgDescriptions::eString);
01623 
01624     if ( !m_QueryCannotBeNucl ) {
01625         // search strands
01626         arg_desc.AddDefaultKey(kArgStrand, "strand", 
01627                          "Query strand(s) to search against database/subject",
01628                          CArgDescriptions::eString, kDfltArgStrand);
01629         arg_desc.SetConstraint(kArgStrand, &(*new CArgAllow_Strings, 
01630                                              kDfltArgStrand, "plus", "minus"));
01631     }
01632 
01633     arg_desc.SetCurrentGroup("Miscellaneous options");
01634     arg_desc.AddFlag(kArgParseDeflines,
01635                  "Should the query and subject defline(s) be parsed?", true);
01636 
01637     arg_desc.SetCurrentGroup("");
01638 }
01639 
01640 void
01641 CQueryOptionsArgs::ExtractAlgorithmOptions(const CArgs& args, 
01642                                            CBlastOptions& opt)
01643 {
01644     // Get the strand
01645     {
01646         m_Strand = eNa_strand_unknown;
01647 
01648         if (!Blast_QueryIsProtein(opt.GetProgramType()) && args[kArgStrand]) {
01649             const string& kStrand = args[kArgStrand].AsString();
01650             if (kStrand == "both") {
01651                 m_Strand = eNa_strand_both;
01652             } else if (kStrand == "plus") {
01653                 m_Strand = eNa_strand_plus;
01654             } else if (kStrand == "minus") {
01655                 m_Strand = eNa_strand_minus;
01656             } else {
01657                 abort();
01658             }
01659         }
01660     }
01661 
01662     // set the sequence range
01663     if (args[kArgQueryLocation]) {
01664         m_Range = ParseSequenceRange(args[kArgQueryLocation].AsString(), 
01665                                      "Invalid specification of query location");
01666     }
01667 
01668     m_UseLCaseMask = static_cast<bool>(args[kArgUseLCaseMasking]);
01669     m_ParseDeflines = static_cast<bool>(args[kArgParseDeflines]);
01670 }
01671 
01672 CBlastDatabaseArgs::CBlastDatabaseArgs(bool request_mol_type /* = false */,
01673                                        bool is_rpsblast /* = false */,
01674                                        bool is_igblast  /* = false */)
01675     : m_RequestMoleculeType(request_mol_type), 
01676       m_IsRpsBlast(is_rpsblast),
01677       m_IsIgBlast(is_igblast),
01678       m_IsProtein(true), 
01679       m_SupportsDatabaseMasking(false)
01680 {}
01681 
01682 bool
01683 CBlastDatabaseArgs::HasBeenSet(const CArgs& args)
01684 {
01685     if ( (args.Exist(kArgDb) && args[kArgDb].HasValue()) ||
01686          (args.Exist(kArgSubject) && args[kArgSubject].HasValue()) ) {
01687         return true;
01688     }
01689     return false;
01690 }
01691 
01692 void
01693 CBlastDatabaseArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
01694 {
01695     arg_desc.SetCurrentGroup("General search options");
01696     // database filename
01697     arg_desc.AddOptionalKey(kArgDb, "database_name", "BLAST database name", 
01698                             CArgDescriptions::eString);
01699     arg_desc.SetCurrentGroup("");
01700 
01701     if (m_RequestMoleculeType) {
01702         arg_desc.AddKey(kArgDbType, "database_type", 
01703                         "BLAST database molecule type",
01704                         CArgDescriptions::eString);
01705         arg_desc.SetConstraint(kArgDbType, 
01706                                &(*new CArgAllow_Strings, "prot", "nucl"));
01707     }
01708 
01709     vector<string> database_args;
01710     database_args.push_back(kArgDb);
01711     database_args.push_back(kArgGiList);
01712     database_args.push_back(kArgSeqIdList);
01713     database_args.push_back(kArgNegativeGiList);
01714     if (m_SupportsDatabaseMasking) {
01715         database_args.push_back(kArgDbSoftMask);
01716         database_args.push_back(kArgDbHardMask);
01717     }
01718 
01719     // DB size
01720     arg_desc.SetCurrentGroup("Statistical options");
01721     arg_desc.AddOptionalKey(kArgDbSize, "num_letters", 
01722                             "Effective length of the database ",
01723                             CArgDescriptions::eInt8);
01724 
01725     arg_desc.SetCurrentGroup("Restrict search or results");
01726     // GI list
01727     if (!m_IsRpsBlast) {
01728         arg_desc.AddOptionalKey(kArgGiList, "filename", 
01729                                 "Restrict search of database to list of GI's",
01730                                 CArgDescriptions::eString);
01731         // SeqId list
01732         arg_desc.AddOptionalKey(kArgSeqIdList, "filename", 
01733                                 "Restrict search of database to list of SeqId's",
01734                                 CArgDescriptions::eString);
01735         // Negative GI list
01736         arg_desc.AddOptionalKey(kArgNegativeGiList, "filename", 
01737                                 "Restrict search of database to everything"
01738                                 " except the listed GIs",
01739                                 CArgDescriptions::eString);
01740         arg_desc.SetDependency(kArgGiList, CArgDescriptions::eExcludes, 
01741                                kArgNegativeGiList);
01742         arg_desc.SetDependency(kArgGiList, CArgDescriptions::eExcludes, 
01743                                kArgSeqIdList);
01744         arg_desc.SetDependency(kArgSeqIdList, CArgDescriptions::eExcludes, 
01745                                kArgNegativeGiList);
01746 
01747         // For now, disable pairing -remote with either -gilist or
01748         // -negative_gilist as this is not implemented in the BLAST server
01749         arg_desc.SetDependency(kArgGiList, CArgDescriptions::eExcludes, 
01750                                kArgRemote);
01751         arg_desc.SetDependency(kArgSeqIdList, CArgDescriptions::eExcludes, 
01752                                kArgRemote);
01753         arg_desc.SetDependency(kArgNegativeGiList, CArgDescriptions::eExcludes,
01754                                kArgRemote);
01755     }
01756 
01757     // Entrez Query
01758     arg_desc.AddOptionalKey(kArgEntrezQuery, "entrez_query", 
01759                             "Restrict search with the given Entrez query",
01760                             CArgDescriptions::eString);
01761 
01762     // Entrez query currently requires the -remote option
01763     arg_desc.SetDependency(kArgEntrezQuery, CArgDescriptions::eRequires, 
01764                            kArgRemote);
01765 
01766 
01767 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION  > 550)) && \
01768      (!defined(NCBI_COMPILER_MIPSPRO)) )
01769     // Masking of database
01770     if (m_SupportsDatabaseMasking) {
01771         arg_desc.AddOptionalKey(kArgDbSoftMask, 
01772                 "filtering_algorithm",
01773                 "Filtering algorithm ID to apply to the BLAST database as soft "
01774                 "masking",
01775                 CArgDescriptions::eString);
01776         arg_desc.SetDependency(kArgDbSoftMask, CArgDescriptions::eExcludes, 
01777                            kArgDbHardMask);
01778 
01779         arg_desc.AddOptionalKey(kArgDbHardMask, 
01780                 "filtering_algorithm",
01781                 "Filtering algorithm ID to apply to the BLAST database as hard "
01782                 "masking",
01783                 CArgDescriptions::eString);
01784     }
01785 #endif
01786 
01787     // There is no RPS-BLAST 2 sequences
01788     if ( !m_IsRpsBlast ) {
01789         arg_desc.SetCurrentGroup("BLAST-2-Sequences options");
01790         // subject sequence input (for bl2seq)
01791         arg_desc.AddOptionalKey(kArgSubject, "subject_input_file",
01792                                 "Subject sequence(s) to search",
01793                                 CArgDescriptions::eInputFile);
01794         ITERATE(vector<string>, dbarg, database_args) {
01795             arg_desc.SetDependency(kArgSubject, CArgDescriptions::eExcludes, 
01796                                    *dbarg);
01797         }
01798 
01799         // subject location
01800         arg_desc.AddOptionalKey(kArgSubjectLocation, "range", 
01801                         "Location on the subject sequence in 1-based offsets "
01802                         "(Format: start-stop)",
01803                         CArgDescriptions::eString);
01804         ITERATE(vector<string>, dbarg, database_args) {
01805             arg_desc.SetDependency(kArgSubjectLocation, 
01806                                    CArgDescriptions::eExcludes, 
01807                                    *dbarg);
01808         }
01809         // Because Blast4-subject does not support Seq-locs, specifying a
01810         // subject range does not work for remote searches
01811         arg_desc.SetDependency(kArgSubjectLocation, 
01812                                CArgDescriptions::eExcludes, kArgRemote);
01813     }
01814 
01815     arg_desc.SetCurrentGroup("");
01816 }
01817 
01818 void
01819 CBlastDatabaseArgs::ExtractAlgorithmOptions(const CArgs& args,
01820                                             CBlastOptions& opts)
01821 {
01822     EMoleculeType mol_type = Blast_SubjectIsNucleotide(opts.GetProgramType())
01823         ? CSearchDatabase::eBlastDbIsNucleotide
01824         : CSearchDatabase::eBlastDbIsProtein;
01825     m_IsProtein = (mol_type == CSearchDatabase::eBlastDbIsProtein);
01826 
01827     if (args.Exist(kArgDb) && args[kArgDb]) {
01828 
01829         m_SearchDb.Reset(new CSearchDatabase(args[kArgDb].AsString(), 
01830                                              mol_type));
01831 
01832         if (args.Exist(kArgGiList) && args[kArgGiList]) {
01833             string fn(SeqDB_ResolveDbPath(args[kArgGiList].AsString()));
01834             m_SearchDb->SetGiList(CRef<CSeqDBGiList> (new CSeqDBFileGiList(fn)));
01835 
01836         } else if (args.Exist(kArgNegativeGiList) && args[kArgNegativeGiList]) {
01837             string fn(SeqDB_ResolveDbPath(args[kArgNegativeGiList].AsString()));
01838             m_SearchDb->SetNegativeGiList(CRef<CSeqDBGiList> (new CSeqDBFileGiList(fn)));
01839 
01840         } else if (args.Exist(kArgSeqIdList) && args[kArgSeqIdList]) {
01841             string fn(SeqDB_ResolveDbPath(args[kArgSeqIdList].AsString()));
01842             m_SearchDb->SetGiList(CRef<CSeqDBGiList> (new CSeqDBFileGiList(fn,
01843                              CSeqDBFileGiList::eMixList)));
01844         }
01845 
01846         if (args.Exist(kArgEntrezQuery) && args[kArgEntrezQuery])
01847             m_SearchDb->SetEntrezQueryLimitation(args[kArgEntrezQuery].AsString());
01848 
01849 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION  > 550)) && \
01850      (!defined(NCBI_COMPILER_MIPSPRO)) )
01851         if (args.Exist(kArgDbSoftMask) && args[kArgDbSoftMask]) {
01852             m_SearchDb->SetFilteringAlgorithm(args[kArgDbSoftMask].AsString(), eSoftSubjMasking);
01853         } else if (args.Exist(kArgDbHardMask) && args[kArgDbHardMask]) {
01854             m_SearchDb->SetFilteringAlgorithm(args[kArgDbHardMask].AsString(), eHardSubjMasking);
01855         }
01856 #endif
01857     } else if (args.Exist(kArgSubject) && args[kArgSubject]) {
01858 
01859         CNcbiIstream& subj_input_stream = args[kArgSubject].AsInputFile();
01860         TSeqRange subj_range;
01861         if (args.Exist(kArgSubjectLocation) && args[kArgSubjectLocation]) {
01862             subj_range = 
01863                 ParseSequenceRange(args[kArgSubjectLocation].AsString(), 
01864                             "Invalid specification of subject location");
01865         }
01866 
01867         const bool parse_deflines = args.Exist(kArgParseDeflines) 
01868         ? bool(args[kArgParseDeflines])
01869             : kDfltArgParseDeflines;
01870         const bool use_lcase_masks = args.Exist(kArgUseLCaseMasking)
01871         ? bool(args[kArgUseLCaseMasking])
01872             : kDfltArgUseLCaseMasking;
01873         CRef<blast::CBlastQueryVector> subjects;
01874         m_Scope = ReadSequencesToBlast(subj_input_stream, IsProtein(),
01875                                        subj_range, parse_deflines,
01876                                        use_lcase_masks, subjects);
01877         m_Subjects.Reset(new blast::CObjMgr_QueryFactory(*subjects));
01878 
01879     } else if (!m_IsIgBlast){
01880         // IgBlast permits use of germline database
01881         NCBI_THROW(CInputException, eInvalidInput,
01882            "Either a BLAST database or subject sequence(s) must be specified");
01883     }
01884 
01885     if (opts.GetEffectiveSearchSpace() != 0) {
01886         // no need to set any other options, as this trumps them
01887         return;
01888     }
01889 
01890     if (args[kArgDbSize]) {
01891         opts.SetDbLength(args[kArgDbSize].AsInt8());
01892     }
01893 
01894 }
01895 
01896 void
01897 CFormattingArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
01898 {
01899     arg_desc.SetCurrentGroup("Formatting options");
01900 
01901     string kOutputFormatDescription = string(
01902     "alignment view options:\n"
01903     "  0 = pairwise,\n"
01904     "  1 = query-anchored showing identities,\n"
01905     "  2 = query-anchored no identities,\n"
01906     "  3 = flat query-anchored, show identities,\n"
01907     "  4 = flat query-anchored, no identities,\n"
01908     "  5 = XML Blast output,\n"
01909     "  6 = tabular,\n"
01910     "  7 = tabular with comment lines,\n"
01911     "  8 = Text ASN.1,\n"
01912     "  9 = Binary ASN.1,\n"
01913     " 10 = Comma-separated values,\n"
01914     " 11 = BLAST archive format (ASN.1) \n"
01915     " 12 = JSON Seqalign output \n\n"
01916     "Options 6, 7, and 10 can be additionally configured to produce\n"
01917     "a custom format specified by space delimited format specifiers.\n"
01918     "The supported format specifiers are:\n") +
01919         DescribeTabularOutputFormatSpecifiers() + 
01920         string("\n");
01921 
01922     int dft_outfmt = kDfltArgOutputFormat;
01923 
01924     // Igblast shows extra column of gaps
01925     if (m_IsIgBlast) {
01926         kOutputFormatDescription = string(
01927     "alignment view options:\n"
01928     "  3 = flat query-anchored, show identities,\n"
01929     "  4 = flat query-anchored, no identities,\n"
01930     "  7 = tabular with comment lines\n\n"
01931     "Options 7 can be additionally configured to produce\n"
01932     "a custom format specified by space delimited format specifiers.\n"
01933     "The supported format specifiers are:\n") +
01934         DescribeTabularOutputFormatSpecifiers(true) + 
01935         string("\n");
01936         dft_outfmt = 3;
01937     }
01938 
01939     // alignment view
01940     arg_desc.AddDefaultKey(kArgOutputFormat, "format", 
01941                            kOutputFormatDescription,
01942                            CArgDescriptions::eString, 
01943                            NStr::IntToString(dft_outfmt));
01944 
01945     // show GIs in deflines
01946     arg_desc.AddFlag(kArgShowGIs, "Show NCBI GIs in deflines?", true);
01947 
01948     // number of one-line descriptions to display
01949     arg_desc.AddOptionalKey(kArgNumDescriptions, "int_value",
01950                  "Number of database sequences to show one-line "
01951                  "descriptions for\n"
01952                  "Not applicable for outfmt > 4\n"
01953                  "Default = `"+ NStr::IntToString(m_DfltNumDescriptions)+ "'",
01954                  CArgDescriptions::eInteger);
01955     arg_desc.SetConstraint(kArgNumDescriptions, 
01956                            new CArgAllowValuesGreaterThanOrEqual(0));
01957 
01958     // number of alignments per DB sequence
01959     arg_desc.AddOptionalKey(kArgNumAlignments, "int_value",
01960                  "Number of database sequences to show alignments for\n"
01961                  "Default = `" + NStr::IntToString(m_DfltNumAlignments) + "'",
01962                  CArgDescriptions::eInteger );
01963     arg_desc.SetConstraint(kArgNumAlignments, 
01964                            new CArgAllowValuesGreaterThanOrEqual(0));
01965 
01966     arg_desc.AddOptionalKey(kArgLineLength, "line_length",
01967                             "Line length for formatting alignments\n"
01968                             "Not applicable for outfmt > 4\n"
01969                             "Default = `"+ NStr::NumericToString(align_format::kDfltLineLength) + "'",
01970                             CArgDescriptions::eInteger);
01971     arg_desc.SetConstraint(kArgLineLength,
01972                            new CArgAllowValuesGreaterThanOrEqual(1));
01973 
01974     // Produce HTML?
01975     if(!m_IsIgBlast){
01976         arg_desc.AddFlag(kArgProduceHtml, "Produce HTML output?", true);
01977     }
01978     /// Hit list size, listed here for convenience only
01979     arg_desc.SetCurrentGroup("Restrict search or results");
01980     arg_desc.AddOptionalKey(kArgMaxTargetSequences, "num_sequences",
01981                             "Maximum number of aligned sequences to keep \n"
01982                             "Not applicable for outfmt <= 4\n"
01983                             "Default = `" + NStr::IntToString(BLAST_HITLIST_SIZE) + "'",
01984                             CArgDescriptions::eInteger);
01985     arg_desc.SetConstraint(kArgMaxTargetSequences,
01986                            new CArgAllowValuesGreaterThanOrEqual(1));
01987     arg_desc.SetDependency(kArgMaxTargetSequences,
01988                            CArgDescriptions::eExcludes,
01989                            kArgNumDescriptions);
01990     arg_desc.SetDependency(kArgMaxTargetSequences,
01991                            CArgDescriptions::eExcludes,
01992                            kArgNumAlignments);
01993 
01994     arg_desc.SetCurrentGroup("");
01995 }
01996 
01997 bool 
01998 CFormattingArgs::ArchiveFormatRequested(const CArgs& args) const
01999 {
02000     EOutputFormat output_fmt;
02001     string ignore;
02002     ParseFormattingString(args, output_fmt, ignore);
02003     return (output_fmt == eArchiveFormat ? true : false);
02004 }
02005 
02006 void
02007 CFormattingArgs::ParseFormattingString(const CArgs& args, 
02008                                        EOutputFormat& fmt_type, 
02009                                        string& custom_fmt_spec) const
02010 {
02011     custom_fmt_spec.clear();
02012     if (args[kArgOutputFormat]) {
02013         string fmt_choice = 
02014             NStr::TruncateSpaces(args[kArgOutputFormat].AsString());
02015         string::size_type pos;
02016         if ( (pos = fmt_choice.find_first_of(' ')) != string::npos) {
02017             custom_fmt_spec.assign(fmt_choice, pos+1, 
02018                                    fmt_choice.size()-(pos+1));
02019             fmt_choice.erase(pos);
02020         }
02021         int val = 0;
02022         try { val = NStr::StringToInt(fmt_choice); }
02023         catch (const CStringException&) {   // probably a conversion error
02024             CNcbiOstrstream os;
02025             os << "'" << fmt_choice << "' is not a valid output format";
02026             string msg = CNcbiOstrstreamToString(os);
02027             NCBI_THROW(CInputException, eInvalidInput, msg);
02028         }
02029         if (val < 0 || val >= static_cast<int>(eEndValue)) {
02030             string msg("Formatting choice is out of range");
02031             throw std::out_of_range(msg);
02032         }
02033         if (m_IsIgBlast && (val != 3 && val != 4 && val != 7)) {
02034             string msg("Formatting choice is not valid");
02035             throw std::out_of_range(msg);
02036         }
02037         fmt_type = static_cast<EOutputFormat>(val);
02038         if ( !(fmt_type == eTabular ||
02039                fmt_type == eTabularWithComments ||
02040                fmt_type == eCommaSeparatedValues) ) {
02041                custom_fmt_spec.clear();
02042         }
02043     }
02044 }
02045 
02046 
02047 void
02048 CFormattingArgs::ExtractAlgorithmOptions(const CArgs& args,
02049                                          CBlastOptions& opt)
02050 {
02051     ParseFormattingString(args, m_OutputFormat, m_CustomOutputFormatSpec);
02052     m_ShowGis = static_cast<bool>(args[kArgShowGIs]);
02053     if(m_IsIgBlast){
02054         m_Html = false;
02055     } else {
02056         m_Html = static_cast<bool>(args[kArgProduceHtml]);
02057     }
02058     // Default hitlist size 500, value can be changed if import search strategy is used
02059     int hitlist_size = opt.GetHitlistSize();
02060 
02061     // To preserve hitlist size in import search strategy > 500,
02062     // we need to increase the  num_ descriptions and num_alignemtns
02063     if(hitlist_size > BLAST_HITLIST_SIZE )
02064     {
02065         if(!args[kArgNumDescriptions] && !args[kArgNumAlignments] &&
02066            (m_OutputFormat <= eFlatQueryAnchoredNoIdentities)) {
02067             m_NumDescriptions = hitlist_size;
02068             m_NumAlignments = hitlist_size/ 2;
02069             return;
02070         }
02071     }
02072 
02073     if(m_OutputFormat <= eFlatQueryAnchoredNoIdentities) {
02074          if (args[kArgMaxTargetSequences]) {
02075              ERR_POST(Warning << "The parameter -max_target_seqs is ignored for "
02076                         "output formats, 0,1,2,3. Use -num_descriptions "
02077                         "and -num_alignments to control output");
02078          }
02079 
02080          m_NumDescriptions = m_DfltNumDescriptions;
02081          m_NumAlignments = m_DfltNumAlignments;
02082 
02083          if (args[kArgNumDescriptions]) {
02084             m_NumDescriptions = args[kArgNumDescriptions].AsInteger();
02085          }
02086 
02087         if (args[kArgNumAlignments]) {
02088             m_NumAlignments = args[kArgNumAlignments].AsInteger();
02089         }
02090 
02091         // The If clause is for handling import_search_strategy hitlist size < 500
02092         // We want to preserve the hitlist size in iss if no formatting input is entered in cmdline
02093         // If formmating option(s) is entered than the iss hitlist size is overridden.
02094         if (args[kArgNumDescriptions] || args[kArgNumAlignments]) {
02095             hitlist_size = max(m_NumDescriptions, m_NumAlignments);
02096         }
02097 
02098         if (args[kArgLineLength]) {
02099             m_LineLength = args[kArgLineLength].AsInteger();
02100         }
02101     }
02102     else
02103     {
02104         if (args[kArgNumDescriptions]) {
02105          ERR_POST(Warning << "The parameter -num_descriptions is ignored for "
02106                              "output formats > 4 . Use -max_target_seqs "
02107                              "to control output");
02108         }
02109 
02110         if (args[kArgLineLength]) {
02111          ERR_POST(Warning << "The parameter -line_length is not applicable for "
02112                              "output formats > 4 .");
02113         }
02114 
02115         if (args[kArgMaxTargetSequences]) {
02116             hitlist_size = args[kArgMaxTargetSequences].AsInteger();
02117         }
02118         else if (args[kArgNumAlignments]) {
02119             hitlist_size = args[kArgNumAlignments].AsInteger();
02120         }
02121 
02122         m_NumDescriptions = hitlist_size;
02123         m_NumAlignments = hitlist_size;
02124     }
02125 
02126     opt.SetHitlistSize(hitlist_size);
02127 
02128     return;
02129 }
02130 
02131 void
02132 CMTArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
02133 
02134 
02135 {
02136     if(m_IsRpsBlast)
02137     {
02138         x_SetArgumentDescriptionsRpsBlast(arg_desc);
02139         return;
02140     }
02141 
02142 
02143     // number of threads
02144     arg_desc.SetCurrentGroup("Miscellaneous options");
02145 #ifdef NCBI_THREADS
02146     const int kMinValue = static_cast<int>(CThreadable::kMinNumThreads);
02147     arg_desc.AddDefaultKey(kArgNumThreads, "int_value",
02148                            "Number of threads (CPUs) to use in the BLAST search",
02149                            CArgDescriptions::eInteger, 
02150                            NStr::IntToString(kMinValue));
02151     arg_desc.SetConstraint(kArgNumThreads, 
02152                            new CArgAllowValuesGreaterThanOrEqual(kMinValue));
02153     arg_desc.SetDependency(kArgNumThreads,
02154                            CArgDescriptions::eExcludes,
02155                            kArgRemote);
02156     /*
02157     arg_desc.SetDependency(kArgNumThreads,
02158                            CArgDescriptions::eExcludes,
02159                            kArgUseIndex);
02160     */
02161 #endif
02162     arg_desc.SetCurrentGroup("");
02163 }
02164 
02165 void
02166 CMTArgs::x_SetArgumentDescriptionsRpsBlast(CArgDescriptions& arg_desc)
02167 {
02168     // number of threads
02169     arg_desc.SetCurrentGroup("Miscellaneous options");
02170 #ifdef NCBI_THREADS
02171     arg_desc.AddDefaultKey(kArgNumThreads, "int_value",
02172                            "Number of threads to use in RPS BLAST search:\n "
02173                            "0 (auto = num of databases)\n "
02174                            "1 (disable)\n max number of threads = num of databases",
02175                            CArgDescriptions::eInteger,
02176                            NStr::IntToString(kDefaultRpsNumThreads));
02177     arg_desc.SetConstraint(kArgNumThreads,
02178                            new CArgAllowValuesGreaterThanOrEqual(0));
02179     arg_desc.SetDependency(kArgNumThreads,
02180                            CArgDescriptions::eExcludes,
02181                            kArgRemote);
02182     /*
02183     arg_desc.SetDependency(kArgNumThreads,
02184                            CArgDescriptions::eExcludes,
02185                            kArgUseIndex);
02186     */
02187 #endif
02188     arg_desc.SetCurrentGroup("");
02189 }
02190 
02191 void
02192 CMTArgs::ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& /* opts */)
02193 {
02194     if(m_IsRpsBlast)
02195     {
02196         x_ExtractAlgorithmOptionsRpsBlast(args);
02197         return;
02198     }
02199     if (args.Exist(kArgNumThreads) &&
02200         args[kArgNumThreads].HasValue()) {  // could be cancelled by the exclusion in CRemoteArgs
02201         m_NumThreads = args[kArgNumThreads].AsInteger();
02202 
02203         // This is temporarily ignored (per SB-635)
02204         if (args.Exist(kArgSubject) && args[kArgSubject].HasValue() &&
02205             m_NumThreads != CThreadable::kMinNumThreads) {
02206             m_NumThreads = CThreadable::kMinNumThreads;
02207             LOG_POST(Warning << "'" << kArgNumThreads << "' is currently "
02208                      << "ignored when '" << kArgSubject << "' is specified.");
02209         }
02210     }
02211 }
02212 
02213 void
02214 CMTArgs::x_ExtractAlgorithmOptionsRpsBlast(const CArgs& args)
02215 {
02216     if (args.Exist(kArgNumThreads) &&
02217         args[kArgNumThreads].HasValue())
02218     {
02219         m_NumThreads = args[kArgNumThreads].AsInteger();
02220     }
02221 }
02222 
02223 void
02224 CRemoteArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
02225 {
02226     arg_desc.SetCurrentGroup("Miscellaneous options");
02227     arg_desc.AddFlag(kArgRemote, "Execute search remotely?", true);
02228 
02229     arg_desc.SetCurrentGroup("");
02230 }
02231 
02232 void
02233 CRemoteArgs::ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& /* opts */)
02234 {
02235     if (args.Exist(kArgRemote)) {
02236         m_IsRemote = static_cast<bool>(args[kArgRemote]);
02237     }
02238 }
02239 
02240 void
02241 CDebugArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
02242 {
02243 #if _BLAST_DEBUG
02244     arg_desc.SetCurrentGroup("Miscellaneous options");
02245     arg_desc.AddFlag("verbose", "Produce verbose output (show BLAST options)",
02246                      true);
02247     arg_desc.AddFlag("remote_verbose", 
02248                      "Produce verbose output for remote searches", true);
02249     arg_desc.AddFlag("use_test_remote_service", 
02250                      "Send remote requests to test servers", true);
02251     arg_desc.SetCurrentGroup("");
02252 #endif /* _BLAST_DEBUG */
02253 }
02254 
02255 void
02256 CDebugArgs::ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& /* opts */)
02257 {
02258 #if _BLAST_DEBUG
02259     m_DebugOutput = static_cast<bool>(args["verbose"]);
02260     m_RmtDebugOutput = static_cast<bool>(args["remote_verbose"]);
02261     if (args["use_test_remote_service"]) {
02262         IRWRegistry& reg = CNcbiApplication::Instance()->GetConfig();
02263         reg.Set("BLAST4", DEF_CONN_REG_SECTION "_" REG_CONN_SERVICE_NAME,
02264                 "blast4_test");
02265     }
02266 #endif /* _BLAST_DEBUG */
02267 }
02268 
02269 void
02270 CHspFilteringArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
02271 {
02272     // culling limit
02273     arg_desc.SetCurrentGroup("Restrict search or results");
02274     arg_desc.AddOptionalKey(kArgCullingLimit, "int_value",
02275                      "If the query range of a hit is enveloped by that of at "
02276                      "least this many higher-scoring hits, delete the hit",
02277                      CArgDescriptions::eInteger);
02278     arg_desc.SetConstraint(kArgCullingLimit, 
02279     // best hit algorithm arguments
02280                new CArgAllowValuesGreaterThanOrEqual(kDfltArgCullingLimit));
02281 
02282     arg_desc.AddOptionalKey(kArgBestHitOverhang, "float_value", 
02283                             "Best Hit algorithm overhang value "
02284                             "(recommended value: " +
02285                             NStr::DoubleToString(kDfltArgBestHitOverhang) +
02286                             ")",
02287                             CArgDescriptions::eDouble);
02288     arg_desc.SetConstraint(kArgBestHitOverhang, 
02289                            new CArgAllowValuesBetween(kBestHit_OverhangMin, 
02290                                                       kBestHit_OverhangMax));
02291     arg_desc.SetDependency(kArgBestHitOverhang,
02292                            CArgDescriptions::eExcludes,
02293                            kArgCullingLimit);
02294 
02295     arg_desc.AddOptionalKey(kArgBestHitScoreEdge, "float_value", 
02296                             "Best Hit algorithm score edge value "
02297                             "(recommended value: " +
02298                             NStr::DoubleToString(kDfltArgBestHitScoreEdge) +
02299                             ")",
02300                             CArgDescriptions::eDouble);
02301     arg_desc.SetConstraint(kArgBestHitScoreEdge, 
02302                            new CArgAllowValuesBetween(kBestHit_ScoreEdgeMin, 
02303                                                       kBestHit_ScoreEdgeMax));
02304     arg_desc.SetDependency(kArgBestHitScoreEdge,
02305                            CArgDescriptions::eExcludes,
02306                            kArgCullingLimit);
02307     arg_desc.SetCurrentGroup("");
02308 }
02309 
02310 void
02311 CHspFilteringArgs::ExtractAlgorithmOptions(const CArgs& args, 
02312                                       CBlastOptions& opts)
02313 {
02314     if (args[kArgCullingLimit]) {
02315         opts.SetCullingLimit(args[kArgCullingLimit].AsInteger());
02316     }
02317     if (args[kArgBestHitOverhang]) {
02318         opts.SetBestHitOverhang(args[kArgBestHitOverhang].AsDouble());
02319     }
02320     if (args[kArgBestHitScoreEdge]) {
02321         opts.SetBestHitScoreEdge(args[kArgBestHitScoreEdge].AsDouble());
02322     }
02323 }
02324 
02325 void
02326 CMbIndexArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
02327 {
02328     arg_desc.SetCurrentGroup("General search options");
02329     arg_desc.AddDefaultKey( 
02330             kArgUseIndex, "boolean",
02331             "Use MegaBLAST database index",
02332             CArgDescriptions::eBoolean, NStr::BoolToString(kDfltArgUseIndex));
02333     arg_desc.AddOptionalKey(
02334             kArgIndexName, "string",
02335             "MegaBLAST database index name (deprecated; use only for old style indices)",
02336             CArgDescriptions::eString );
02337     arg_desc.SetCurrentGroup( "" );
02338 }
02339 
02340 bool
02341 CMbIndexArgs::HasBeenSet(const CArgs& args)
02342 {
02343     if ( (args.Exist(kArgUseIndex) && args[kArgUseIndex].HasValue()) ||
02344          (args.Exist(kArgIndexName) && args[kArgIndexName].HasValue()) ) {
02345         return true;
02346     }
02347     return false;
02348 }
02349 
02350 void
02351 CMbIndexArgs::ExtractAlgorithmOptions(const CArgs& args,
02352                                       CBlastOptions& opts)
02353 {
02354     // MB Index does not apply to Blast2Sequences
02355     if( args.Exist( kArgUseIndex ) &&
02356         !(args.Exist( kArgSubject ) && args[kArgSubject])) {
02357 
02358         bool use_index   = true;
02359         bool force_index = false;
02360         bool old_style_index = false;
02361 
02362         if( args[kArgUseIndex] ) {
02363             if( args[kArgUseIndex].AsBoolean() ) force_index = true;
02364             else use_index = false;
02365         }
02366 
02367         if( args.Exist( kTask ) && args[kTask] && 
02368                 args[kTask].AsString() != "megablast" ) {
02369             use_index = false;
02370         }
02371 
02372         if( use_index ) {
02373             string index_name;
02374 
02375             if( args.Exist( kArgIndexName ) && args[kArgIndexName] ) {
02376                 index_name = args[kArgIndexName].AsString();
02377                 old_style_index = true;
02378             }
02379             else if( args.Exist( kArgDb ) && args[kArgDb] ) {
02380                 index_name = args[kArgDb].AsString();
02381             }
02382             else {
02383                 NCBI_THROW(CInputException, eInvalidInput,
02384                         "Can not deduce database index name" );
02385             }
02386     
02387             opts.SetUseIndex( true, index_name, force_index, old_style_index );
02388         }
02389     }
02390 }
02391 
02392 void
02393 CStdCmdLineArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
02394 {
02395     arg_desc.SetCurrentGroup("Input query options");
02396 
02397     // query filename
02398     arg_desc.AddDefaultKey(kArgQuery, "input_file", 
02399                      "Input file name",
02400                      CArgDescriptions::eInputFile, kDfltArgQuery);
02401 
02402     arg_desc.SetCurrentGroup("General search options");
02403 
02404     // report output file
02405     arg_desc.AddDefaultKey(kArgOutput, "output_file", 
02406                    "Output file name",
02407                    CArgDescriptions::eOutputFile, "-");
02408 
02409     arg_desc.SetCurrentGroup("");
02410 }
02411 
02412 void
02413 CStdCmdLineArgs::ExtractAlgorithmOptions(const CArgs& args,
02414                                          CBlastOptions& /* opt */)
02415 {
02416     if (args.Exist(kArgQuery) && args[kArgQuery].HasValue() &&
02417         m_InputStream == NULL) {
02418         m_InputStream = &args[kArgQuery].AsInputFile();
02419     }
02420     m_OutputStream = &args[kArgOutput].AsOutputFile();
02421 }
02422 
02423 CNcbiIstream&
02424 CStdCmdLineArgs::GetInputStream() const
02425 {
02426     // programmer must ensure the ExtractAlgorithmOptions method is called
02427     // before this method is invoked
02428     if ( !m_InputStream ) {
02429         abort();
02430     }
02431     return *m_InputStream;
02432 }
02433 
02434 CNcbiOstream&
02435 CStdCmdLineArgs::GetOutputStream() const
02436 {
02437     // programmer must ensure the ExtractAlgorithmOptions method is called
02438     // before this method is invoked
02439     _ASSERT(m_OutputStream);
02440     return *m_OutputStream;
02441 }
02442 
02443 void
02444 CStdCmdLineArgs::SetInputStream(CRef<CTmpFile> input_file)
02445 {
02446     m_QueryTmpInputFile = input_file;
02447     m_InputStream = &input_file->AsInputFile(CTmpFile::eIfExists_Throw);
02448 }
02449 
02450 void
02451 CSearchStrategyArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
02452 {
02453     arg_desc.SetCurrentGroup("Search strategy options");
02454 
02455     arg_desc.AddOptionalKey(kArgInputSearchStrategy,
02456                             "filename",
02457                             "Search strategy to use", 
02458                             CArgDescriptions::eInputFile);
02459     arg_desc.AddOptionalKey(kArgOutputSearchStrategy,
02460                             "filename",
02461                             "File name to record the search strategy used", 
02462                             CArgDescriptions::eOutputFile);
02463     arg_desc.SetDependency(kArgInputSearchStrategy,
02464                            CArgDescriptions::eExcludes,
02465                            kArgOutputSearchStrategy);
02466 
02467     arg_desc.SetCurrentGroup("");
02468 }
02469 
02470 void
02471 CSearchStrategyArgs::ExtractAlgorithmOptions(const CArgs& /* cmd_line_args */,
02472                                              CBlastOptions& /* options */)
02473 {
02474 }
02475 
02476 CNcbiIstream* 
02477 CSearchStrategyArgs::GetImportStream(const CArgs& args) const
02478 {
02479     CNcbiIstream* retval = NULL;
02480     if (args[kArgInputSearchStrategy].HasValue()) {
02481         retval = &args[kArgInputSearchStrategy].AsInputFile();
02482     }
02483     return retval;
02484 }
02485 
02486 CNcbiOstream* 
02487 CSearchStrategyArgs::GetExportStream(const CArgs& args) const
02488 {
02489     CNcbiOstream* retval = NULL;
02490     if (args[kArgOutputSearchStrategy].HasValue()) {
02491         retval = &args[kArgOutputSearchStrategy].AsOutputFile();
02492     }
02493     return retval;
02494 }
02495 
02496 CBlastAppArgs::CBlastAppArgs()
02497 {
02498     m_SearchStrategyArgs.Reset(new CSearchStrategyArgs);
02499     m_Args.push_back(CRef<IBlastCmdLineArgs>(&*m_SearchStrategyArgs));
02500     m_IsUngapped = false;
02501 }
02502 
02503 CArgDescriptions*
02504 CBlastAppArgs::SetCommandLine()
02505 {
02506     return SetUpCommandLineArguments(m_Args);
02507 }
02508 
02509 CRef<CBlastOptionsHandle>
02510 CBlastAppArgs::SetOptions(const CArgs& args)
02511 {
02512     // We're recovering from a saved strategy or combining
02513     // CBlastOptions/CBlastOptionsHandle with command line options (in GBench,
02514     // see GB-1116), so we need to still extract
02515     // certain options from the command line, include overriding query
02516     // and/or database
02517     if (m_OptsHandle.NotEmpty()) {
02518         CBlastOptions& opts = m_OptsHandle->SetOptions();
02519         //opts.DebugDumpText(cerr, "OptionsBeforeLoop", 1);
02520         const bool mbidxargs_set = CMbIndexArgs::HasBeenSet(args);
02521         const bool dbargs_set = CBlastDatabaseArgs::HasBeenSet(args);
02522         NON_CONST_ITERATE(TBlastCmdLineArgs, arg, m_Args) {
02523             if (dynamic_cast<CMbIndexArgs*>(&**arg)) {
02524                 if (mbidxargs_set)
02525                     (*arg)->ExtractAlgorithmOptions(args, opts);
02526             } else if (dynamic_cast<CBlastDatabaseArgs*>(&**arg)) {
02527                 if (dbargs_set)
02528                     m_BlastDbArgs->ExtractAlgorithmOptions(args, opts);
02529             } else {
02530                 (*arg)->ExtractAlgorithmOptions(args, opts);
02531             }
02532         }
02533         m_IsUngapped = !opts.GetGappedMode();
02534         try { m_OptsHandle->Validate(); }
02535         catch (const CBlastException& e) {
02536             NCBI_THROW(CInputException, eInvalidInput, e.GetMsg());
02537         }
02538         //opts.DebugDumpText(cerr, "OptionsAfterLoop", 1);
02539         return m_OptsHandle;
02540     }
02541 
02542     CBlastOptions::EAPILocality locality = 
02543         (args.Exist(kArgRemote) && args[kArgRemote]) 
02544         ? CBlastOptions::eRemote 
02545         : CBlastOptions::eLocal;
02546 
02547     // This is needed as a CRemoteBlast object and its options are instantiated
02548     // to create the search strategy
02549     if (GetExportSearchStrategyStream(args) || 
02550            m_FormattingArgs->ArchiveFormatRequested(args)) {
02551         locality = CBlastOptions::eBoth;
02552     }
02553 
02554     CRef<CBlastOptionsHandle> retval(x_CreateOptionsHandle(locality, args));
02555     CBlastOptions& opts = retval->SetOptions();
02556     NON_CONST_ITERATE(TBlastCmdLineArgs, arg, m_Args) {
02557         (*arg)->ExtractAlgorithmOptions(args, opts);
02558     }
02559 
02560     m_IsUngapped = !opts.GetGappedMode();
02561     try { retval->Validate(); }
02562     catch (const CBlastException& e) {
02563         NCBI_THROW(CInputException, eInvalidInput, e.GetMsg());
02564     }
02565     return retval;
02566 }
02567 
02568 void CBlastAppArgs::SetTask(const string& task)
02569 {
02570 #if _BLAST_DEBUG
02571     ThrowIfInvalidTask(task);
02572 #endif
02573     m_Task.assign(task);
02574 }
02575 
02576 CArgDescriptions* 
02577 SetUpCommandLineArguments(TBlastCmdLineArgs& args)
02578 {
02579     auto_ptr<CArgDescriptions> retval(new CArgDescriptions);
02580 
02581     // Create the groups so that the ordering is established
02582     retval->SetCurrentGroup("Input query options");
02583     retval->SetCurrentGroup("General search options");
02584     retval->SetCurrentGroup("BLAST database options");
02585     retval->SetCurrentGroup("BLAST-2-Sequences options");
02586     retval->SetCurrentGroup("Formatting options");
02587     retval->SetCurrentGroup("Query filtering options");
02588     retval->SetCurrentGroup("Restrict search or results");
02589     retval->SetCurrentGroup("Discontiguous MegaBLAST options");
02590     retval->SetCurrentGroup("Statistical options");
02591     retval->SetCurrentGroup("Search strategy options");
02592     retval->SetCurrentGroup("Extension options");
02593     retval->SetCurrentGroup("");
02594 
02595 
02596     NON_CONST_ITERATE(TBlastCmdLineArgs, arg, args) {
02597         (*arg)->SetArgumentDescriptions(*retval);
02598     }
02599     return retval.release();
02600 }
02601 
02602 CRef<CBlastOptionsHandle>
02603 CBlastAppArgs::x_CreateOptionsHandleWithTask
02604     (CBlastOptions::EAPILocality locality, const string& task)
02605 {
02606     _ASSERT(!task.empty());
02607     CRef<CBlastOptionsHandle> retval;
02608     SetTask(task);
02609     retval.Reset(CBlastOptionsFactory::CreateTask(GetTask(), locality));
02610     _ASSERT(retval.NotEmpty());
02611     return retval;
02612 }
02613 
02614 void
02615 CBlastAppArgs::x_IssueWarningsForIgnoredOptions(const CArgs& args)
02616 {
02617     set<string> can_override;
02618     can_override.insert(kArgQuery);
02619     can_override.insert(kArgQueryLocation);
02620     can_override.insert(kArgSubject);
02621     can_override.insert(kArgSubjectLocation);
02622     can_override.insert(kArgUseLCaseMasking);
02623     can_override.insert(kArgDb);
02624     can_override.insert(kArgDbSize);
02625     can_override.insert(kArgEntrezQuery);
02626     can_override.insert(kArgDbSoftMask);
02627     can_override.insert(kArgDbHardMask);
02628     can_override.insert(kArgUseIndex);
02629     can_override.insert(kArgIndexName);
02630     can_override.insert(kArgStrand);
02631     can_override.insert(kArgParseDeflines);
02632     can_override.insert(kArgOutput);
02633     can_override.insert(kArgOutputFormat);
02634     can_override.insert(kArgNumDescriptions);
02635     can_override.insert(kArgNumAlignments);
02636     can_override.insert(kArgMaxTargetSequences);
02637     can_override.insert(kArgRemote);
02638     can_override.insert(kArgNumThreads);
02639     can_override.insert(kArgInputSearchStrategy);
02640     can_override.insert(kArgRemote);
02641     can_override.insert("remote_verbose");
02642     can_override.insert("verbose");
02643 
02644     // this stores the arguments (and their defaults) that cannot be overriden
02645     map<string, string> has_defaults;
02646     EBlastProgramType prog = m_OptsHandle->GetOptions().GetProgramType();
02647     has_defaults[kArgCompBasedStats] =
02648             Blast_ProgramIsRpsBlast(prog) ? kDfltArgCompBasedStatsDelta:kDfltArgCompBasedStats;
02649     // FIX the line below for igblast, and add igblast options
02650     has_defaults[kArgEvalue] = NStr::DoubleToString(BLAST_EXPECT_VALUE);
02651     has_defaults[kTask] = m_Task;
02652     has_defaults[kArgOldStyleIndex] = kDfltArgOldStyleIndex;
02653     has_defaults[kArgMaxHSPsPerSubject] =
02654         NStr::IntToString(kDfltArgMaxHSPsPerSubject);
02655     if (Blast_QueryIsProtein(prog)) {
02656         if (NStr::Find(m_Task, "blastp") != NPOS || 
02657             NStr::Find(m_Task, "psiblast") != NPOS) {
02658             has_defaults[kArgSegFiltering] = kDfltArgNoFiltering;
02659         } else {
02660             has_defaults[kArgSegFiltering] = kDfltArgSegFiltering;
02661         }
02662         has_defaults[kArgLookupTableMaskingOnly] =
02663             kDfltArgLookupTableMaskingOnlyProt;
02664         has_defaults[kArgGapTrigger] =
02665             NStr::IntToString((int)BLAST_GAP_TRIGGER_PROT);
02666     } else {
02667         has_defaults[kArgDustFiltering] = kDfltArgDustFiltering;
02668         has_defaults[kArgLookupTableMaskingOnly] =
02669             kDfltArgLookupTableMaskingOnlyNucl;
02670         has_defaults[kArgGapTrigger] =
02671             NStr::IntToString((int)BLAST_GAP_TRIGGER_NUCL);
02672     }
02673     has_defaults[kArgOffDiagonalRange] =
02674         NStr::IntToString(kDfltOffDiagonalRange);
02675     has_defaults[kArgMaskLevel] = kDfltArgMaskLevel;
02676     has_defaults[kArgMaxIntronLength] =
02677         NStr::IntToString(kDfltArgMaxIntronLength);
02678     has_defaults[kArgQueryGeneticCode] = NStr::IntToString((int)BLAST_GENETIC_CODE);
02679     has_defaults[kArgDbGeneticCode] = NStr::IntToString((int)BLAST_GENETIC_CODE);
02680     // pssm engine/psiblast default options
02681     has_defaults[kArgPSIPseudocount] =
02682         NStr::IntToString(PSI_PSEUDO_COUNT_CONST);
02683     has_defaults[kArgPSIInclusionEThreshold] =
02684         NStr::DoubleToString(PSI_INCLUSION_ETHRESH);
02685     has_defaults[kArgPSINumIterations] = 
02686         NStr::IntToString(kDfltArgPSINumIterations);
02687 
02688     // get arguments, remove the supported ones and warn about those that
02689     // cannot be overridden.
02690     typedef vector< CRef<CArgValue> > TArgs;
02691     TArgs arguments = args.GetAll();
02692     ITERATE(TArgs, a, arguments) {
02693         const string& arg_name = (*a)->GetName();
02694         const string& arg_value = (*a)->AsString();
02695         // if it has a default value, ignore it if it's not different from the
02696         // default, otherwise, issue a warning
02697         if (has_defaults.find(arg_name) != has_defaults.end()) {
02698             if (has_defaults[arg_name] == arg_value) { 
02699                 continue;
02700             } else {
02701                 if (arg_name == kTask && arg_value == "megablast") {
02702                     // No need to issue warning here, as it's OK to change this
02703                     continue;
02704                 }
02705                 LOG_POST(Warning << arg_name << " cannot be overridden when "
02706                          "using a search strategy");
02707             }
02708         } 
02709         // if the argument cannot be overridden, issue a warning
02710         if (can_override.find(arg_name) == can_override.end()) {
02711             LOG_POST(Warning << arg_name << " cannot be overridden when "
02712                      "using a search strategy");
02713         }
02714     }
02715 }
02716 
02717 CRef<CBlastOptionsHandle>
02718 CBlastAppArgs::SetOptionsForSavedStrategy(const CArgs& args)
02719 {
02720     if(m_OptsHandle.Empty())
02721     {
02722         NCBI_THROW(CInputException, eInvalidInput, "Empty Blast Options Handle");
02723     }
02724 
02725     // We're recovering from a saved strategy, so we need to still extract
02726     // certain options from the command line, include overriding query
02727     // and/or database
02728     CBlastOptions& opts = m_OptsHandle->SetOptions();
02729     // invoke ExtractAlgorithmOptions on certain argument classes, i.e.: those
02730     // that should have their arguments overriden
02731     m_QueryOptsArgs->ExtractAlgorithmOptions(args, opts);
02732     m_StdCmdLineArgs->ExtractAlgorithmOptions(args, opts);
02733     m_RemoteArgs->ExtractAlgorithmOptions(args, opts);
02734     m_DebugArgs->ExtractAlgorithmOptions(args, opts);
02735     m_FormattingArgs->ExtractAlgorithmOptions(args, opts);
02736     m_MTArgs->ExtractAlgorithmOptions(args, opts);
02737     if (CBlastDatabaseArgs::HasBeenSet(args)) {
02738           m_BlastDbArgs->ExtractAlgorithmOptions(args, opts);
02739     }
02740     if (CMbIndexArgs::HasBeenSet(args)) {
02741         NON_CONST_ITERATE(TBlastCmdLineArgs, arg, m_Args) {
02742             if (dynamic_cast<CMbIndexArgs*>(arg->GetPointer()) != NULL) {
02743                 (*arg)->ExtractAlgorithmOptions(args, opts);
02744             }
02745         }
02746     }
02747     m_IsUngapped = !opts.GetGappedMode();
02748     x_IssueWarningsForIgnoredOptions(args);
02749     try { m_OptsHandle->Validate(); }
02750     catch (const CBlastException& e) {
02751         NCBI_THROW(CInputException, eInvalidInput, e.GetMsg());
02752     }
02753     return m_OptsHandle;
02754 }
02755 
02756 END_SCOPE(blast)
02757 END_NCBI_SCOPE
Modified on Wed Dec 24 12:00:27 2014 by modify_doxy.py rev. 426318