NCBI C++ ToolKit
asnval.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

00001 /*  $Id: asnval.cpp 65573 2014-12-12 15:20:23Z gotvyans $
00002  * ===========================================================================
00003  *
00004  *                            PUBLIC DOMAIN NOTICE
00005  *               National Center for Biotechnology Information
00006  *
00007  *  This software/database is a "United States Government Work" under the
00008  *  terms of the United States Copyright Act.  It was written as part of
00009  *  the author's official duties as a United States Government employee and
00010  *  thus cannot be copyrighted.  This software/database is freely available
00011  *  to the public for use. The National Library of Medicine and the U.S.
00012  *  Government have not placed any restriction on its use or reproduction.
00013  *
00014  *  Although all reasonable efforts have been taken to ensure the accuracy
00015  *  and reliability of the software and data, the NLM and the U.S.
00016  *  Government do not and cannot warrant the performance or results that
00017  *  may be obtained by using this software or data. The NLM and the U.S.
00018  *  Government disclaim all warranties, express or implied, including
00019  *  warranties of performance, merchantability or fitness for any particular
00020  *  purpose.
00021  *
00022  *  Please cite the author in any work or product based on this material.
00023  *
00024  * ===========================================================================
00025  *
00026  * Author:  Jonathan Kans, Clifford Clausen, Aaron Ucko
00027  *
00028  * File Description:
00029  *   validator
00030  *
00031  */
00032 
00033 #include <ncbi_pch.hpp>
00034 #include <corelib/ncbistd.hpp>
00035 #include <corelib/ncbistre.hpp>
00036 #include <corelib/ncbiapp.hpp>
00037 #include <corelib/ncbienv.hpp>
00038 #include <corelib/ncbiargs.hpp>
00039 
00040 #include <serial/serial.hpp>
00041 #include <serial/objistr.hpp>
00042 #include <serial/objectio.hpp>
00043 
00044 #include <connect/ncbi_core_cxx.hpp>
00045 #include <connect/ncbi_util.h>
00046 
00047 // Objects includes
00048 #include <objects/seq/Bioseq.hpp>
00049 #include <objects/seqloc/Seq_id.hpp>
00050 #include <objects/seqloc/Seq_loc.hpp>
00051 #include <objects/seqloc/Seq_interval.hpp>
00052 #include <objects/seq/Seq_inst.hpp>
00053 #include <objects/seq/Pubdesc.hpp>
00054 #include <objects/submit/Seq_submit.hpp>
00055 #include <objects/seqset/Seq_entry.hpp>
00056 #include <objects/seqfeat/BioSource.hpp>
00057 #include <objtools/validator/validator.hpp>
00058 #include <objtools/validator/valid_cmdargs.hpp>
00059 #include <objtools/cleanup/cleanup.hpp>
00060 
00061 #include <objects/seqset/Bioseq_set.hpp>
00062 
00063 // Object Manager includes
00064 #include <objmgr/object_manager.hpp>
00065 #include <objmgr/scope.hpp>
00066 #include <objmgr/seq_vector.hpp>
00067 #include <objmgr/seq_descr_ci.hpp>
00068 #include <objmgr/feat_ci.hpp>
00069 #include <objmgr/align_ci.hpp>
00070 #include <objmgr/graph_ci.hpp>
00071 #include <objmgr/seq_annot_ci.hpp>
00072 #include <objmgr/bioseq_ci.hpp>
00073 #include <objtools/data_loaders/genbank/gbloader.hpp>
00074 
00075 #include <serial/objostrxml.hpp>
00076 #include <misc/xmlwrapp/xmlwrapp.hpp>
00077 #include <util/compress/stream_util.hpp>
00078 #include <util/format_guess.hpp>
00079 
00080 #include <common/test_assert.h>  /* This header must go last */
00081 
00082 
00083 using namespace ncbi;
00084 using namespace objects;
00085 using namespace validator;
00086 
00087 const char * ASNVAL_APP_VER = "10.1";
00088 
00089 #define USE_XMLWRAPP_LIBS
00090 
00091 /////////////////////////////////////////////////////////////////////////////
00092 //
00093 //  Demo application
00094 //
00095 
00096 class CValXMLStream;
00097 
00098 class CAsnvalApp : public CNcbiApplication, CReadClassMemberHook
00099 {
00100 public:
00101     CAsnvalApp(void);
00102 
00103     virtual void Init(void);
00104     virtual int  Run (void);
00105 
00106     // CReadClassMemberHook override
00107     void ReadClassMember(CObjectIStream& in,
00108         const CObjectInfo::CMemberIterator& member);
00109 
00110 private:
00111 
00112     void Setup(const CArgs& args);
00113 
00114     auto_ptr<CObjectIStream> OpenFile(const CArgs& args);
00115     auto_ptr<CObjectIStream> OpenFile(const string& fname);
00116 
00117     CConstRef<CValidError> ProcessSeqEntry(CSeq_entry& se);
00118     CConstRef<CValidError> ProcessSeqEntry(void);
00119     CConstRef<CValidError> ProcessSeqSubmit(void);
00120     CConstRef<CValidError> ProcessSeqAnnot(void);
00121     CConstRef<CValidError> ProcessSeqFeat(void);
00122     CConstRef<CValidError> ProcessBioSource(void);
00123     CConstRef<CValidError> ProcessPubdesc(void);
00124     CConstRef<CValidError> ProcessBioseqset(void);
00125     CConstRef<CValidError> ProcessBioseq(void);
00126 
00127     CConstRef<CValidError> ValidateInput (void);
00128     void ValidateOneDirectory(string dir_name, bool recurse);
00129     void ValidateOneFile(string fname);
00130     void ProcessReleaseFile(const CArgs& args);
00131 
00132     void ConstructOutputStreams();
00133     void DestroyOutputStreams();
00134 
00135     CRef<CSeq_entry> ReadSeqEntry(void);
00136     CRef<CSeq_feat> ReadSeqFeat(void);
00137     CRef<CBioSource> ReadBioSource(void);
00138     CRef<CPubdesc> ReadPubdesc(void);
00139 
00140     void ReportReadFailure(void);
00141 
00142     CRef<CScope> BuildScope(void);
00143 
00144     void PrintValidError(CConstRef<CValidError> errors, 
00145         const CArgs& args);
00146 
00147     enum EVerbosity {
00148         eVerbosity_Normal = 1,
00149         eVerbosity_Spaced = 2,
00150         eVerbosity_Tabbed = 3,
00151         eVerbosity_XML = 4,
00152         eVerbosity_min = 1, eVerbosity_max = 4
00153     };
00154 
00155     void PrintValidErrItem(const CValidErrItem& item);
00156 
00157     CRef<CObjectManager> m_ObjMgr;
00158     auto_ptr<CObjectIStream> m_In;
00159     unsigned int m_Options;
00160     bool m_Continue;
00161     bool m_OnlyAnnots;
00162     time_t m_Longest;
00163     string m_CurrentId;
00164     string m_LongestId;
00165     size_t m_NumFiles;
00166 
00167     size_t m_Level;
00168     size_t m_Reported;
00169     EDiagSev m_ReportLevel;
00170 
00171     bool m_DoCleanup;
00172     CCleanup m_Cleanup;
00173 
00174     EDiagSev m_LowCutoff;
00175     EDiagSev m_HighCutoff;
00176 
00177     EVerbosity m_verbosity;
00178     string     m_obj_type;
00179 
00180     CNcbiOstream* m_ValidErrorStream;
00181     CNcbiOstream* m_LogStream;
00182 #ifdef USE_XMLWRAPP_LIBS
00183     auto_ptr<CValXMLStream> m_ostr_xml;
00184 #endif
00185 };
00186 
00187 class CValXMLStream: public CObjectOStreamXml
00188 {
00189 public:
00190     CValXMLStream(CNcbiOstream& out, bool deleteOut): CObjectOStreamXml(out, deleteOut){};
00191     void Print(const CValidErrItem& item);
00192 };
00193 
00194 
00195 CAsnvalApp::CAsnvalApp(void) :
00196     m_ObjMgr(0), m_In(0), m_Options(0), m_Continue(false), m_OnlyAnnots(false),
00197     m_Longest(0), m_CurrentId(""), m_LongestId(""), m_NumFiles(0),
00198     m_Level(0), m_Reported(0), m_verbosity(eVerbosity_min),
00199     m_ValidErrorStream(0), m_LogStream(0)
00200 {
00201 }
00202 
00203 
00204 void CAsnvalApp::Init(void)
00205 {
00206     // Prepare command line descriptions
00207 
00208     // Create
00209     auto_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
00210 
00211     arg_desc->AddOptionalKey
00212         ("p", "Directory", "Path to ASN.1 Files",
00213         CArgDescriptions::eInputFile);
00214     arg_desc->AddOptionalKey
00215         ("i", "InFile", "Single Input File",
00216         CArgDescriptions::eInputFile);
00217     arg_desc->AddOptionalKey(
00218         "o", "OutFile", "Single Output File",
00219         CArgDescriptions::eOutputFile);
00220     arg_desc->AddOptionalKey(
00221         "f", "Filter", "Substring Filter",
00222         CArgDescriptions::eOutputFile);
00223     arg_desc->AddDefaultKey
00224         ("x", "String", "File Selection Substring", CArgDescriptions::eString, ".ent");
00225     arg_desc->AddFlag("u", "Recurse");
00226     arg_desc->AddDefaultKey(
00227         "R", "SevCount", "Severity for Error in Return Code",
00228         CArgDescriptions::eInteger, "4");
00229     arg_desc->AddDefaultKey(
00230         "Q", "SevLevel", "Lowest Severity for Error to Show",
00231         CArgDescriptions::eInteger, "3");
00232     arg_desc->AddDefaultKey(
00233         "P", "SevLevel", "Highest Severity for Error to Show",
00234         CArgDescriptions::eInteger, "4");
00235     CArgAllow* constraint = new CArgAllow_Integers(eDiagSevMin, eDiagSevMax);
00236     arg_desc->SetConstraint("Q", constraint);
00237     arg_desc->SetConstraint("P", constraint);
00238     arg_desc->SetConstraint("R", constraint);
00239     arg_desc->AddOptionalKey(
00240         "E", "String", "Only Error Code to Show",
00241         CArgDescriptions::eString);
00242 
00243     arg_desc->AddDefaultKey("a", "a", 
00244                             "ASN.1 Type (a Automatic, z Any, e Seq-entry, b Bioseq, s Bioseq-set, m Seq-submit, t Batch Bioseq-set, u Batch Seq-submit",
00245                             CArgDescriptions::eString,
00246                             "a");
00247 
00248     arg_desc->AddFlag("b", "Input is in binary format");
00249     arg_desc->AddFlag("c", "Batch File is Compressed");
00250 
00251     CValidatorArgUtil::SetupArgDescriptions(arg_desc.get());
00252     arg_desc->AddFlag("annot", "Verify Seq-annots only");
00253 
00254     arg_desc->AddOptionalKey(
00255         "L", "OutFile", "Log File",
00256         CArgDescriptions::eOutputFile);
00257 
00258     arg_desc->AddDefaultKey("v", "Verbosity", "Verbosity", CArgDescriptions::eInteger, "1");
00259     CArgAllow* v_constraint = new CArgAllow_Integers(eVerbosity_min, eVerbosity_max);
00260     arg_desc->SetConstraint("v", v_constraint);
00261 
00262     arg_desc->AddFlag("cleanup", "Perform BasicCleanup before validating (to match C Toolkit)");
00263 
00264     // Program description
00265     string prog_description = "ASN Validator\n";
00266     arg_desc->SetUsageContext(GetArguments().GetProgramBasename(),
00267         prog_description, false);
00268 
00269     // Pass argument descriptions to the application
00270     SetupArgDescriptions(arg_desc.release());
00271 
00272 }
00273 
00274 
00275 CConstRef<CValidError> CAsnvalApp::ValidateInput (void)
00276 {
00277     // Process file based on its content
00278     // Unless otherwise specifien we assume the file in hand is
00279     // a Seq-entry ASN.1 file, other option are a Seq-submit or NCBI
00280     // Release file (batch processing) where we process each Seq-entry
00281     // at a time.
00282     CConstRef<CValidError> eval;
00283     // ASN.1 Type (a Automatic, z Any, e Seq-entry, b Bioseq, s Bioseq-set, m Seq-submit, t Batch Bioseq-set, u Batch Seq-submit",
00284     string header = m_In->ReadFileHeader();
00285     if (header.empty() && !m_obj_type.empty())
00286     {
00287         switch (m_obj_type[0])
00288         {
00289         case 'e':
00290             header = "Seq-entry";
00291             break;
00292         case 'm':
00293             header = "Seq-submit";
00294             break;
00295         case 's':
00296             header = "Bioseq-set";
00297             break;
00298         case 'b':
00299             header = "Bioseq";
00300             break;
00301         }
00302     }
00303 
00304     if (header == "Seq-submit" ) {  // Seq-submit
00305         eval = ProcessSeqSubmit();
00306     } else if ( header == "Seq-entry" ) {           // Seq-entry
00307         eval = ProcessSeqEntry();
00308     } else if ( header == "Seq-annot" ) {           // Seq-annot
00309         eval = ProcessSeqAnnot();
00310     } else if (header == "Seq-feat" ) {             // Seq-feat
00311         eval = ProcessSeqFeat();
00312     } else if (header == "BioSource" ) {            // BioSource
00313         eval = ProcessBioSource();
00314     } else if (header == "Pubdesc" ) {              // Pubdesc
00315         eval = ProcessPubdesc();
00316     } else if (header == "Bioseq-set" ) {           // Bioseq-set
00317         eval = ProcessBioseqset();
00318     } else if (header == "Bioseq" ) {               // Bioseq
00319         eval = ProcessBioseq();
00320     } else {
00321         NCBI_THROW(CException, eUnknown, "Unhandled type " + header);
00322     }
00323 
00324     return eval;
00325 }
00326 
00327 
00328 void CAsnvalApp::ValidateOneFile(string fname)
00329 {
00330     const CArgs& args = GetArgs();
00331 
00332     if (m_LogStream) {
00333         *m_LogStream << fname << endl;
00334     }
00335     time_t start_time = time(NULL);
00336     auto_ptr<CNcbiOfstream> local_stream;
00337 
00338     try {
00339     if (!m_ValidErrorStream) {
00340         string path = fname;
00341         size_t pos = NStr::Find(path, ".", 0, string::npos, NStr::eLast);
00342         if (pos != string::npos) {
00343             path = path.substr(0, pos);
00344         }
00345         path = path + ".val";
00346 
00347         local_stream.reset(new CNcbiOfstream(path.c_str()));
00348         m_ValidErrorStream = local_stream.get();
00349 
00350         ConstructOutputStreams();
00351     }
00352     } catch (CException) {
00353     }
00354     m_In = OpenFile(fname);
00355     if (m_In.get() != 0)
00356     {
00357         try {
00358             if ( NStr::Equal(args["a"].AsString(), "t")) {          // Release file
00359                 // Open File 
00360                 ProcessReleaseFile(args);
00361             } else {
00362 
00363                 CConstRef<CValidError> eval = ValidateInput ();
00364 
00365                 if ( eval ) {
00366                     PrintValidError(eval, args);
00367                 }
00368 
00369             }
00370         } catch (CException &e) {
00371             // Also log to XML?
00372             ERR_POST(e);
00373             ++m_Reported;
00374         }
00375     }
00376     time_t stop_time = time(NULL);
00377     time_t elapsed = stop_time - start_time;
00378     if (elapsed > m_Longest) {
00379         m_Longest = elapsed;
00380         m_LongestId = m_CurrentId;
00381     }
00382     m_NumFiles++;
00383     DestroyOutputStreams();
00384     m_In.reset();
00385 }
00386 
00387 
00388 void CAsnvalApp::ValidateOneDirectory(string dir_name, bool recurse)
00389 {
00390     const CArgs& args = GetArgs();
00391 
00392     CDir dir(dir_name);
00393 
00394     string suffix = ".ent";
00395     if (args["x"]) {
00396         suffix = args["x"].AsString();
00397     }
00398     string mask = "*" + suffix;
00399 
00400     CDir::TEntries files (dir.GetEntries(mask, CDir::eFile));
00401     ITERATE(CDir::TEntries, ii, files) {
00402         string fname = (*ii)->GetName();
00403         if ((*ii)->IsFile() &&
00404             (!args["f"] || NStr::Find (fname, args["f"].AsString()) != string::npos)) {
00405             string fname = CDirEntry::MakePath(dir_name, (*ii)->GetName());
00406             ValidateOneFile (fname);
00407         }
00408     }
00409     if (recurse) {
00410         CDir::TEntries subdirs (dir.GetEntries("", CDir::eDir));
00411         ITERATE(CDir::TEntries, ii, subdirs) {
00412             string subdir = (*ii)->GetName();
00413             if ((*ii)->IsDir() && !NStr::Equal(subdir, ".") && !NStr::Equal(subdir, "..")) {
00414                 string subname = CDirEntry::MakePath(dir_name, (*ii)->GetName());
00415                 ValidateOneDirectory (subname, recurse);
00416             }
00417         }
00418     }
00419 }
00420 
00421 
00422 int CAsnvalApp::Run(void)
00423 {
00424     const CArgs& args = GetArgs();
00425     Setup(args);
00426 
00427     time_t start_time = time(NULL);
00428 
00429     if (args["o"]) {
00430         m_ValidErrorStream = &(args["o"].AsOutputFile());
00431     }
00432             
00433     m_LogStream = args["L"] ? &(args["L"].AsOutputFile()) : &NcbiCout;
00434 
00435     // note - the C Toolkit uses 0 for SEV_NONE, but the C++ Toolkit uses 0 for SEV_INFO
00436     // adjust here to make the inputs to asnvalidate match asnval expectations
00437     m_ReportLevel = static_cast<EDiagSev>(args["R"].AsInteger() - 1);
00438     m_LowCutoff = static_cast<EDiagSev>(args["Q"].AsInteger() - 1);
00439     m_HighCutoff = static_cast<EDiagSev>(args["P"].AsInteger() - 1);
00440 
00441     m_DoCleanup = args["cleanup"] && args["cleanup"].AsBoolean();
00442     m_verbosity = static_cast<EVerbosity>(args["v"].AsInteger());
00443 
00444     // Process file based on its content
00445     // Unless otherwise specifien we assume the file in hand is
00446     // a Seq-entry ASN.1 file, other option are a Seq-submit or NCBI
00447     // Release file (batch processing) where we process each Seq-entry
00448     // at a time.
00449     m_Reported = 0;
00450 
00451     m_obj_type = args["a"].AsString();
00452 
00453     if (args["b"] && m_obj_type == "a")
00454     {
00455         NCBI_THROW(CException, eUnknown, "Specific argument -a must be used along with -b flags" );
00456     }
00457 
00458     bool execption_caught = false;
00459     try {
00460         ConstructOutputStreams();
00461 
00462         if ( args["p"] ) {
00463             ValidateOneDirectory (args["p"].AsString(), args["u"]);
00464         } else {
00465             if (args["i"]) {
00466                 ValidateOneFile (args["i"].AsString());
00467             }
00468         }
00469     } catch (CException& e) {
00470         ERR_POST(Error << e);
00471         execption_caught = true;
00472     }
00473 
00474     time_t stop_time = time(NULL);
00475     if (m_LogStream) {
00476         *m_LogStream << "Finished in " << stop_time - start_time << " seconds" << endl;
00477         *m_LogStream << "Longest processing time " << m_Longest << " seconds on " << m_LongestId << endl;
00478         *m_LogStream << "Total number of records " << m_NumFiles << endl;
00479     }
00480 
00481     DestroyOutputStreams();
00482 
00483     if (m_Reported > 0  ||  execption_caught) {
00484         return 1;
00485     } else {
00486         return 0;
00487     }
00488 }
00489 
00490 
00491 CRef<CScope> CAsnvalApp::BuildScope (void)
00492 {
00493     CRef<CScope> scope(new CScope (*m_ObjMgr));
00494     scope->AddDefaults();
00495 
00496     return scope;
00497 }
00498 
00499 
00500 void CAsnvalApp::ReadClassMember
00501 (CObjectIStream& in,
00502  const CObjectInfo::CMemberIterator& member)
00503 {
00504     m_Level++;
00505 
00506     if ( m_Level == 1 ) {
00507         size_t n = 0;
00508         // Read each element separately to a local TSeqEntry,
00509         // process it somehow, and... not store it in the container.
00510         for ( CIStreamContainerIterator i(in, member); i; ++i ) {
00511             try {
00512                 // Get seq-entry to validate
00513                 CRef<CSeq_entry> se(new CSeq_entry);
00514                 i >> *se;
00515 
00516                 // Validate Seq-entry
00517                 CValidator validator(*m_ObjMgr);
00518                 CRef<CScope> scope = BuildScope();
00519                 CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(*se);
00520 
00521                 if (m_DoCleanup) {
00522                     m_Cleanup.SetScope (scope);
00523                     m_Cleanup.BasicCleanup (*se);
00524                 }
00525 
00526                 if ( m_OnlyAnnots ) {
00527                     for (CSeq_annot_CI ni(seh); ni; ++ni) {
00528                         const CSeq_annot_Handle& sah = *ni;
00529                         CConstRef<CValidError> eval = validator.Validate(sah, m_Options);
00530                         if ( eval ) {
00531                             PrintValidError(eval, GetArgs());
00532                         }
00533                     }
00534                 } else {
00535                     // CConstRef<CValidError> eval = validator.Validate(*se, &scope, m_Options);
00536                     CStopWatch sw(CStopWatch::eStart);
00537                     CConstRef<CValidError> eval = validator.Validate(seh, m_Options);
00538                     //if (m_ValidErrorStream) {
00539                     //    *m_ValidErrorStream << "Elapsed = " << sw.Elapsed() << endl;
00540                     //}
00541                     if ( eval ) {
00542                         PrintValidError(eval, GetArgs());
00543                     }
00544                 }
00545                 scope->RemoveTopLevelSeqEntry(seh);
00546                 scope->ResetHistory();
00547                 n++;
00548             } catch (exception&) {
00549                 if ( !m_Continue ) {
00550                     throw;
00551                 }
00552                 // should we issue some sort of warning?
00553             }
00554         }
00555     } else {
00556         in.ReadClassMember(member);
00557     }
00558 
00559     m_Level--;
00560 }
00561 
00562 
00563 void CAsnvalApp::ProcessReleaseFile
00564 (const CArgs& args)
00565 {
00566     CRef<CBioseq_set> seqset(new CBioseq_set);
00567 
00568     // Register the Seq-entry hook
00569     CObjectTypeInfo set_type = CType<CBioseq_set>();
00570     set_type.FindMember("seq-set").SetLocalReadHook(*m_In, this);
00571 
00572     // Read the CBioseq_set, it will call the hook object each time we 
00573     // encounter a Seq-entry
00574     *m_In >> *seqset;
00575 }
00576 
00577 void CAsnvalApp::ReportReadFailure(void)
00578 {
00579     CNcbiOstream& os = *m_ValidErrorStream;
00580 
00581     if (m_verbosity == eVerbosity_XML) {
00582 
00583         os << "  <message severity=\"REJECT\" code=\"GENERIC_InvalidAsn\">Unable to read invalid ASN.1</message>";
00584 
00585         return;
00586     }
00587 
00588     os << "REJECT: valid [GENERIC.InvalidAsn] Unable to read invalid ASN.1" << endl;
00589 }
00590 
00591 
00592 CRef<CSeq_entry> CAsnvalApp::ReadSeqEntry(void)
00593 {
00594     CRef<CSeq_entry> se(new CSeq_entry);
00595     m_In->Read(ObjectInfo(*se), CObjectIStream::eNoFileHeader);
00596 
00597     return se;
00598 }
00599 
00600 CConstRef<CValidError> CAsnvalApp::ProcessBioseq(void)
00601 {
00602     // Get seq-entry to validate
00603     CRef<CSeq_entry> se(new CSeq_entry);
00604     CBioseq& bioseq = se->SetSeq();
00605 
00606     m_In->Read(ObjectInfo(bioseq), CObjectIStream::eNoFileHeader);
00607 
00608     // Validate Seq-entry
00609     return ProcessSeqEntry(*se);
00610 }
00611 
00612 CConstRef<CValidError> CAsnvalApp::ProcessBioseqset(void)
00613 {
00614     // Get seq-entry to validate
00615     CRef<CSeq_entry> se(new CSeq_entry);
00616     CBioseq_set& bioseqset = se->SetSet();
00617 
00618     m_In->Read(ObjectInfo(bioseqset), CObjectIStream::eNoFileHeader);
00619     // Validate Seq-entry
00620     return ProcessSeqEntry(*se);
00621 }
00622 
00623 
00624 CConstRef<CValidError> CAsnvalApp::ProcessSeqEntry(void)
00625 {
00626     // Get seq-entry to validate
00627     CRef<CSeq_entry> se(new CSeq_entry);
00628 
00629     try {
00630         m_In->Read(ObjectInfo(*se), CObjectIStream::eNoFileHeader);    
00631     }
00632     catch (const CException& e) {
00633         ERR_POST(Error << e);
00634         ReportReadFailure();
00635         CRef<CValidError> errors(new CValidError());
00636         return errors;
00637     }
00638 
00639     try
00640     {
00641         return ProcessSeqEntry(*se);
00642     }
00643     catch (const CObjMgrException& om_ex)
00644     {        
00645         if (om_ex.GetErrCode() == CObjMgrException::eAddDataError)
00646           se->ReassignConflictingIds();
00647     }
00648     // try again
00649     return ProcessSeqEntry(*se);
00650 }
00651 
00652 CConstRef<CValidError> CAsnvalApp::ProcessSeqEntry(CSeq_entry& se)
00653 {
00654     // Validate Seq-entry
00655     CValidator validator(*m_ObjMgr);
00656     CRef<CScope> scope = BuildScope();
00657     if (m_DoCleanup) {        
00658         m_Cleanup.SetScope (scope);
00659         m_Cleanup.BasicCleanup (se);
00660     }
00661     CSeq_entry_Handle seh = scope->AddTopLevelSeqEntry(se);
00662     if (m_LogStream) {
00663         CBioseq_CI bi(seh);
00664         if (bi) {
00665             m_CurrentId = "";
00666             bi->GetId().front().GetSeqId()->GetLabel(&m_CurrentId);
00667             *m_LogStream << m_CurrentId << endl;
00668         }
00669     }
00670 
00671     if ( m_OnlyAnnots ) {
00672         for (CSeq_annot_CI ni(seh); ni; ++ni) {
00673             const CSeq_annot_Handle& sah = *ni;
00674             CConstRef<CValidError> eval = validator.Validate(sah, m_Options);
00675             if ( eval ) {
00676                 PrintValidError(eval, GetArgs());
00677             }
00678         }
00679         return CConstRef<CValidError>();
00680     }
00681     return validator.Validate(se, scope, m_Options);
00682 }
00683 
00684 
00685 CRef<CSeq_feat> CAsnvalApp::ReadSeqFeat(void)
00686 {
00687     CRef<CSeq_feat> feat(new CSeq_feat);
00688     m_In->Read(ObjectInfo(*feat), CObjectIStream::eNoFileHeader);
00689 
00690     return feat;
00691 }
00692 
00693 
00694 CConstRef<CValidError> CAsnvalApp::ProcessSeqFeat(void)
00695 {
00696     CRef<CSeq_feat> feat(ReadSeqFeat());
00697 
00698     CRef<CScope> scope = BuildScope();
00699     if (m_DoCleanup) {
00700         m_Cleanup.SetScope (scope);
00701         m_Cleanup.BasicCleanup (*feat);
00702     }
00703 
00704     CValidator validator(*m_ObjMgr);
00705     return validator.Validate(*feat, scope, m_Options);
00706 }
00707 
00708 
00709 CRef<CBioSource> CAsnvalApp::ReadBioSource(void)
00710 {
00711     CRef<CBioSource> src(new CBioSource);
00712     m_In->Read(ObjectInfo(*src), CObjectIStream::eNoFileHeader);
00713 
00714     return src;
00715 }
00716 
00717 
00718 CConstRef<CValidError> CAsnvalApp::ProcessBioSource(void)
00719 {
00720     CRef<CBioSource> src(ReadBioSource());
00721 
00722     CValidator validator(*m_ObjMgr);
00723     CRef<CScope> scope = BuildScope();
00724     return validator.Validate(*src, scope, m_Options);
00725 }
00726 
00727 
00728 CRef<CPubdesc> CAsnvalApp::ReadPubdesc(void)
00729 {
00730     CRef<CPubdesc> pd(new CPubdesc());
00731     m_In->Read(ObjectInfo(*pd), CObjectIStream::eNoFileHeader);
00732 
00733     return pd;
00734 }
00735 
00736 
00737 CConstRef<CValidError> CAsnvalApp::ProcessPubdesc(void)
00738 {
00739     CRef<CPubdesc> pd(ReadPubdesc());
00740 
00741     CValidator validator(*m_ObjMgr);
00742     CRef<CScope> scope = BuildScope();
00743     return validator.Validate(*pd, scope, m_Options);
00744 }
00745 
00746 
00747 
00748 CConstRef<CValidError> CAsnvalApp::ProcessSeqSubmit(void)
00749 {
00750     CRef<CSeq_submit> ss(new CSeq_submit);
00751 
00752     // Get seq-submit to validate
00753     try {
00754         m_In->Read(ObjectInfo(*ss), CObjectIStream::eNoFileHeader);
00755     }
00756     catch (CException& e) {
00757         ERR_POST(Error << e);
00758         ReportReadFailure();
00759         CRef<CValidError> errors(new CValidError());
00760         return errors;
00761     }
00762 
00763     // Validae Seq-submit
00764     CValidator validator(*m_ObjMgr);
00765     CRef<CScope> scope = BuildScope();
00766     if (ss->GetData().IsEntrys()) {
00767         ITERATE(CSeq_submit::TData::TEntrys, se, ss->GetData().GetEntrys()) {
00768             scope->AddTopLevelSeqEntry(**se);
00769         }
00770     }
00771     if (m_DoCleanup) {
00772         m_Cleanup.SetScope (scope);
00773         m_Cleanup.BasicCleanup (*ss);
00774     }
00775 
00776     return validator.Validate(*ss, scope, m_Options);
00777 }
00778 
00779 
00780 CConstRef<CValidError> CAsnvalApp::ProcessSeqAnnot(void)
00781 {
00782     CRef<CSeq_annot> sa(new CSeq_annot);
00783 
00784     // Get seq-annot to validate
00785     m_In->Read(ObjectInfo(*sa), CObjectIStream::eNoFileHeader);
00786 
00787     // Validae Seq-annot
00788     CValidator validator(*m_ObjMgr);
00789     CRef<CScope> scope = BuildScope();
00790     if (m_DoCleanup) {
00791         m_Cleanup.SetScope (scope);
00792         m_Cleanup.BasicCleanup (*sa);
00793     }
00794     CSeq_annot_Handle sah = scope->AddSeq_annot(*sa);
00795     return validator.Validate(sah, m_Options);
00796 }
00797 
00798 
00799 void CAsnvalApp::Setup(const CArgs& args)
00800 {
00801     // Setup application registry and logs for CONNECT library
00802     CORE_SetLOG(LOG_cxx2c());
00803     CORE_SetREG(REG_cxx2c(&GetConfig(), false));
00804     // Setup MT-safety for CONNECT library
00805     // CORE_SetLOCK(MT_LOCK_cxx2c());
00806 
00807     // Create object manager
00808     m_ObjMgr = CObjectManager::GetInstance();
00809     if ( args["r"] ) {
00810         // Create GenBank data loader and register it with the OM.
00811         // The last argument "eDefault" informs the OM that the loader must
00812         // be included in scopes during the CScope::AddDefaults() call.
00813         CGBDataLoader::RegisterInObjectManager(*m_ObjMgr);
00814     }
00815 
00816     m_OnlyAnnots = args["annot"];
00817 
00818     // Set validator options
00819     m_Options = CValidatorArgUtil::ArgsToValidatorOptions(args);
00820 }
00821 
00822 
00823 auto_ptr<CObjectIStream> CAsnvalApp::OpenFile(const CArgs& args)
00824 {
00825     // file name
00826     return OpenFile(args["i"].AsString());
00827 }
00828 
00829 
00830 auto_ptr<CObjectIStream> OpenUncompressedStream(const string& fname)
00831 {
00832     auto_ptr<CNcbiIstream> InputStream(new CNcbiIfstream (fname.c_str(), ios::binary));
00833     CCompressStream::EMethod method;
00834     
00835     CFormatGuess::EFormat format = CFormatGuess::Format(*InputStream);
00836     switch (format)
00837     {
00838     case CFormatGuess::eGZip:  method = CCompressStream::eGZipFile;  break;
00839     case CFormatGuess::eBZip2: method = CCompressStream::eBZip2;     break;
00840     case CFormatGuess::eLzo:   method = CCompressStream::eLZO;       break;
00841     default:                   method = CCompressStream::eNone;      break;
00842     }
00843     if (method != CCompressStream::eNone)
00844     {
00845         CDecompressIStream* decompress(new CDecompressIStream(*InputStream, method, CCompressStream::fDefault, eTakeOwnership));
00846         InputStream.release();
00847         InputStream.reset(decompress);
00848         format = CFormatGuess::Format(*InputStream);
00849     }
00850 
00851     auto_ptr<CObjectIStream> objectStream;
00852     switch (format)
00853     {
00854         case CFormatGuess::eBinaryASN:
00855         case CFormatGuess::eTextASN:
00856             objectStream.reset(CObjectIStream::Open(format==CFormatGuess::eBinaryASN ? eSerial_AsnBinary : eSerial_AsnText, *InputStream, eTakeOwnership));
00857             InputStream.release();
00858             break;
00859         default:
00860             break;
00861     }
00862     return objectStream;
00863 }
00864 
00865 
00866 auto_ptr<CObjectIStream> CAsnvalApp::OpenFile(const string& fname)
00867 {
00868     return OpenUncompressedStream(fname);
00869 }
00870 
00871 void CAsnvalApp::PrintValidError
00872 (CConstRef<CValidError> errors, 
00873  const CArgs& args)
00874 {
00875     if ( errors->TotalSize() == 0 ) {
00876         return;
00877     }
00878 
00879     for ( CValidError_CI vit(*errors); vit; ++vit) {
00880         if (vit->GetSeverity() >= m_ReportLevel) {
00881             ++m_Reported;
00882         }
00883         if ( vit->GetSeverity() < m_LowCutoff || vit->GetSeverity() > m_HighCutoff) {
00884             continue;
00885         }
00886         if (args["E"] && !(NStr::EqualNocase(args["E"].AsString(), vit->GetErrCode()))) {
00887             continue;
00888         }
00889         PrintValidErrItem(*vit);
00890     }
00891     m_ValidErrorStream->flush();
00892 }
00893 
00894 
00895 static string s_GetSeverityLabel (EDiagSev sev)
00896 {
00897     static const string str_sev[] = {
00898         "INFO", "WARNING", "ERROR", "REJECT", "FATAL", "MAX"
00899     };
00900     if (sev < 0 || sev > eDiagSevMax) {
00901         return "NONE";
00902     }
00903 
00904     return str_sev[sev];
00905 }
00906 
00907 
00908 void CAsnvalApp::PrintValidErrItem(const CValidErrItem& item)
00909 {
00910     CNcbiOstream& os = *m_ValidErrorStream;
00911     switch (m_verbosity) {
00912         case eVerbosity_Normal:
00913             os << s_GetSeverityLabel(item.GetSeverity()) 
00914                << ": valid [" << item.GetErrGroup() << "." << item.GetErrCode() <<"] "
00915                << item.GetMsg() << " " << item.GetObjDesc() << endl;
00916             break;
00917         case eVerbosity_Spaced:
00918             {
00919                 string spacer = "                    ";
00920                 string msg = item.GetAccnver() + spacer;
00921                 msg = msg.substr(0, 15);
00922                 msg += s_GetSeverityLabel(item.GetSeverity());
00923                 msg += spacer;
00924                 msg = msg.substr(0, 30);
00925                 msg += item.GetErrGroup() + "_" + item.GetErrCode();
00926                 os << msg << endl;
00927             }
00928             break;
00929         case eVerbosity_Tabbed:
00930             os << item.GetAccnver() << "\t"
00931                << s_GetSeverityLabel(item.GetSeverity()) << "\t"
00932                << item.GetErrGroup() << "_" << item.GetErrCode() << endl;
00933             break;
00934 #ifdef USE_XMLWRAPP_LIBS
00935         case eVerbosity_XML:
00936             {
00937                 m_ostr_xml->Print(item);
00938             }
00939 #else
00940         case eVerbosity_XML:
00941             {
00942                 string msg = NStr::XmlEncode(item.GetMsg());
00943                 if (item.IsSetFeatureId()) {
00944                     os << "  <message severity=\"" << s_GetSeverityLabel(item.GetSeverity())
00945                        << "\" seq-id=\"" << item.GetAccnver() 
00946                        << "\" feat-id=\"" << item.GetFeatureId()
00947                        << "\" code=\"" << item.GetErrGroup() << "_" << item.GetErrCode()
00948                        << "\">" << msg << "</message>" << endl;
00949                 } else {
00950                     os << "  <message severity=\"" << s_GetSeverityLabel(item.GetSeverity())
00951                        << "\" seq-id=\"" << item.GetAccnver() 
00952                        << "\" code=\"" << item.GetErrGroup() << "_" << item.GetErrCode()
00953                        << "\">" << msg << "</message>" << endl;
00954                 }
00955             }
00956 #endif
00957             break;
00958     }
00959 }
00960 
00961 void CValXMLStream::Print(const CValidErrItem& item)
00962 {
00963 #if 0
00964     TTypeInfo info = item.GetThisTypeInfo();    
00965     WriteObject(&item, info);
00966 #else
00967     m_Output.PutString("  <message severity=\"");
00968        m_Output.PutString(s_GetSeverityLabel(item.GetSeverity()));
00969     m_Output.PutString("\" seq-id=\"");
00970        WriteString(item.GetAccnver(), eStringTypeVisible);
00971 
00972     if (item.IsSetFeatureId()) {
00973        m_Output.PutString("\" feat-id=\"");
00974        WriteString(item.GetFeatureId(), eStringTypeVisible);
00975     }
00976 
00977     m_Output.PutString("\" code=\"");
00978         WriteString(item.GetErrGroup(), eStringTypeVisible);
00979         m_Output.PutString("_");
00980         WriteString(item.GetErrCode(), eStringTypeVisible);
00981     m_Output.PutString("\">");
00982 
00983     WriteString(item.GetMsg(), eStringTypeVisible);
00984 
00985     m_Output.PutString("</message>");
00986     m_Output.PutEol();
00987 #endif
00988 }
00989 
00990 void CAsnvalApp::ConstructOutputStreams()
00991 {
00992     if (m_ValidErrorStream && m_verbosity == eVerbosity_XML)
00993     {
00994 #ifdef USE_XMLWRAPP_LIBS
00995         m_ostr_xml.reset(new CValXMLStream(*m_ValidErrorStream, false));
00996         m_ostr_xml->SetEncoding(eEncoding_UTF8);
00997         m_ostr_xml->SetReferenceDTD(false);
00998         m_ostr_xml->SetEnforcedStdXml(true);
00999         m_ostr_xml->WriteFileHeader(CValidErrItem::GetTypeInfo());
01000         m_ostr_xml->SetUseIndentation(true);
01001         m_ostr_xml->Flush();
01002 
01003         *m_ValidErrorStream << endl << "<asnvalidate version=\"" << ASNVAL_APP_VER << "\" severity_cutoff=\""
01004         << s_GetSeverityLabel(m_LowCutoff) << "\">" << endl;
01005         m_ValidErrorStream->flush();
01006 #else
01007         *m_ValidErrorStream << "<asnvalidate version=\"" << ASNVAL_APP_VER << "\" severity_cutoff=\""
01008         << s_GetSeverityLabel(m_LowCutoff) << "\">" << endl;
01009 #endif
01010     }
01011 }
01012 
01013 void CAsnvalApp::DestroyOutputStreams()
01014 {
01015 #ifdef USE_XMLWRAPP_LIBS
01016     if (m_ostr_xml.get())
01017     {
01018         m_ostr_xml.reset();
01019         *m_ValidErrorStream << endl << "</asnvalidate>" << endl;
01020     }
01021 #endif
01022     m_ValidErrorStream = 0;
01023 }
01024 
01025 
01026 /////////////////////////////////////////////////////////////////////////////
01027 //  MAIN
01028 
01029 
01030 int main(int argc, const char* argv[])
01031 {
01032     return CAsnvalApp().AppMain(argc, argv);
01033 }
01034 
01035 // don't commit this
01036 void mk(const CSerialObject *obj)
01037 {
01038     if( obj ) {
01039         cerr << MSerial_AsnText << *obj << endl;
01040     } else {
01041         cerr << "(NULL)" << endl;
01042     }
01043 }
Modified on Fri Dec 19 11:42:26 2014 by modify_doxy.py rev. 426318