src/objtools/cddalignview/cav_function.cpp

Go to the documentation of this file.
00001 /*  $Id: cav_function.cpp 138728 2008-08-27 21:07:05Z thiessen $
00002 * ===========================================================================
00003 *
00004 *                            PUBLIC DOMAIN NOTICE
00005 *               National Center for Biotechnology Information
00006 *
00007 *  This software/database is a "United States Government Work" under the
00008 *  terms of the United States Copyright Act.  It was written as part of
00009 *  the author's official duties as a United States Government employee and
00010 *  thus cannot be copyrighted.  This software/database is freely available
00011 *  to the public for use. The National Library of Medicine and the U.S.
00012 *  Government have not placed any restriction on its use or reproduction.
00013 *
00014 *  Although all reasonable efforts have been taken to ensure the accuracy
00015 *  and reliability of the software and data, the NLM and the U.S.
00016 *  Government do not and cannot warrant the performance or results that
00017 *  may be obtained by using this software or data. The NLM and the U.S.
00018 *  Government disclaim all warranties, express or implied, including
00019 *  warranties of performance, merchantability or fitness for any particular
00020 *  purpose.
00021 *
00022 *  Please cite the author in any work or product based on this material.
00023 *
00024 * ===========================================================================
00025 *
00026 * Authors:  Paul Thiessen
00027 *
00028 * File Description:
00029 *      C interfaced function body for cddalignview as function call
00030 *
00031 * ===========================================================================
00032 */
00033 
00034 #include <ncbi_pch.hpp>
00035 #include <corelib/ncbistl.hpp>
00036 #include <corelib/ncbistre.hpp>
00037 #include <corelib/ncbi_limits.h>
00038 #include <corelib/ncbidiag.hpp>
00039 
00040 #include <list>
00041 #include <memory>
00042 
00043 #include <objects/cdd/Cdd.hpp>
00044 #include <objects/ncbimime/Ncbi_mime_asn1.hpp>
00045 #include <objects/ncbimime/Biostruc_seqs.hpp>
00046 #include <objects/ncbimime/Biostruc_align.hpp>
00047 #include <objects/ncbimime/Biostruc_align_seq.hpp>
00048 #include <objects/ncbimime/Biostruc_seqs_aligns_cdd.hpp>
00049 #include <objects/ncbimime/Bundle_seqs_aligns.hpp>
00050 
00051 #include <objtools/cddalignview/cddalignview.h>
00052 #include <objtools/cddalignview/cav_seqset.hpp>
00053 #include <objtools/cddalignview/cav_alignset.hpp>
00054 #include <objtools/cddalignview/cav_asnio.hpp>
00055 #include <objtools/cddalignview/cav_alndisplay.hpp>
00056 #include <objtools/error_codes.hpp>
00057 
00058 
00059 #define NCBI_USE_ERRCODE_X   Objtools_CAV_Func
00060 
00061 
00062 BEGIN_NCBI_SCOPE
00063 USING_SCOPE(objects);
00064 
00065 static EDiagSev defaultDiagPostLevel;
00066 
00067 static int LoadASNFromIstream(CNcbiIstream& asnIstream,
00068     const SeqEntryList* *sequences, const SeqAnnotList* *alignments)
00069 {
00070     *sequences = NULL;
00071     *alignments = NULL;
00072 
00073     // try to decide what ASN type this is, and if it's binary or ascii
00074     static const string
00075         asciiMimeFirstWord = "Ncbi-mime-asn1",
00076         asciiCDDFirstWord = "Cdd";
00077     bool isMime = false, isCDD = false, isBinary = true;
00078 
00079     string firstWord;
00080     asnIstream >> firstWord;
00081     if (firstWord == asciiMimeFirstWord) {
00082         isMime = true;
00083         isBinary = false;
00084     } else if (firstWord == asciiCDDFirstWord) {
00085         isCDD = true;
00086         isBinary = false;
00087     }
00088 
00089     // try to read the file as various ASN types (if it's not clear from the first ascii word).
00090     auto_ptr<SeqEntryList> newSequences(new SeqEntryList());
00091     auto_ptr<SeqAnnotList> newAlignments(new SeqAnnotList());
00092     bool readOK = false;
00093     string err;
00094 
00095     if (!isMime) {
00096 //        ERR_POST_X(1, Info << "trying to read input as " <<
00097 //            ((isBinary) ? "binary" : "ascii") << " cdd");
00098         CRef < CCdd > cdd(new CCdd);
00099         SetDiagPostLevel(eDiag_Fatal); // ignore all but Fatal errors while reading data
00100         asnIstream.seekg(0);
00101         readOK = ReadASNFromIstream(asnIstream, *cdd, isBinary, err);
00102         SetDiagPostLevel(defaultDiagPostLevel);
00103         if (readOK) {
00104             newSequences->resize(1);
00105             newSequences->front().Reset(&(cdd->SetSequences()));
00106             *newAlignments = cdd->GetSeqannot();   // copy the list
00107         }
00108     }
00109 
00110     if (!readOK) {
00111 //        ERR_POST_X(2, Info << "trying to read input as " <<
00112 //            ((isBinary) ? "binary" : "ascii") << " mime");
00113         CRef < CNcbi_mime_asn1 > mime(new CNcbi_mime_asn1);
00114         SetDiagPostLevel(eDiag_Fatal); // ignore all but Fatal errors while reading data
00115         asnIstream.seekg(0);
00116         readOK = ReadASNFromIstream(asnIstream, *mime, isBinary, err);
00117         SetDiagPostLevel(defaultDiagPostLevel);
00118         if (readOK) {
00119             // copy lists
00120             if (mime->IsStrucseqs()) {
00121                 *newSequences = mime->GetStrucseqs().GetSequences();
00122                 *newAlignments = mime->GetStrucseqs().GetSeqalign();
00123             } else if (mime->IsAlignstruc()) {
00124                 *newSequences = mime->GetAlignstruc().GetSequences();
00125                 *newAlignments = mime->GetAlignstruc().GetSeqalign();
00126             } else if (mime->IsAlignseq()) {
00127                 *newSequences = mime->GetAlignseq().GetSequences();
00128                 *newAlignments = mime->GetAlignseq().GetSeqalign();
00129             } else if (mime->IsGeneral()) {
00130                 if (mime->GetGeneral().GetSeq_align_data().IsBundle()) {
00131                     *newSequences = mime->GetGeneral().GetSeq_align_data().GetBundle().GetSequences();
00132                     *newAlignments = mime->GetGeneral().GetSeq_align_data().GetBundle().GetSeqaligns();
00133                 } else if (mime->GetGeneral().GetSeq_align_data().IsCdd()) {
00134                     newSequences->resize(1);
00135                     newSequences->front().Reset(&(mime->SetGeneral().SetSeq_align_data().SetCdd().SetSequences()));
00136                     *newAlignments = mime->GetGeneral().GetSeq_align_data().GetCdd().GetSeqannot();
00137                 }
00138             }
00139         }
00140     }
00141 
00142     if (!readOK) {
00143         ERR_POST_X(3, Error << "Input is not a recognized data type (Ncbi-mime-asn1 or Cdd) : " << err);
00144         return CAV_ERROR_BAD_ASN;
00145     }
00146     if (newSequences->size() == 0 || newAlignments->size() == 0) {
00147         ERR_POST_X(4, Error << "Cannot find sequences and alignments in the input data!");
00148         return CAV_ERROR_BAD_ASN;
00149     }
00150 
00151     *sequences = newSequences.release();
00152     *alignments = newAlignments.release();
00153     return CAV_SUCCESS;
00154 }
00155 
00156 // checks two things for each slave sequence: that all the residues of the sequence
00157 // are present in the display, and that the aligned residues are in the right place
00158 // wrt the master
00159 static bool VerifyAlignmentData(const AlignmentSet *alignmentSet, const AlignmentDisplay *display)
00160 {
00161     int alnLoc, masterLoc, slaveLoc, currentMasterLoc, currentSlaveLoc;
00162     char masterChar, slaveChar;
00163     const MasterSlaveAlignment *alignment;
00164 
00165     for (unsigned int i=0; i<alignmentSet->alignments.size(); ++i) {
00166         masterLoc = slaveLoc = -1;
00167         alignment = alignmentSet->alignments[i];
00168 
00169         for (alnLoc=0; alnLoc<(int)display->GetWidth(); ++alnLoc) {
00170 
00171             // get and check characters
00172             masterChar = display->GetCharAt(alnLoc, 0);
00173             if (masterChar == '?') {
00174                 ERR_POST_X(5, Error << "bad alignment coordinate: loc " << (alnLoc+1) << " row 1 (master)");
00175                 return false;
00176             }
00177             slaveChar = display->GetCharAt(alnLoc, 1 + i);
00178             if (slaveChar == '?') {
00179                 ERR_POST_X(6, Error << "bad alignment coordinate: loc " << (alnLoc+1) << " row " << (i+2));
00180                 return false;
00181             }
00182 
00183             // advance seqLocs, check sequence string length and composition
00184             if (!IsGap(masterChar)) {
00185                 ++masterLoc;
00186                 if (i == 0) {   // only need to check master once
00187                     if (masterLoc >= (int) alignment->master->sequenceString.size()) {
00188                         ERR_POST_X(7, Error << "master sequence too long at alnLoc " << (alnLoc+1)
00189                             << " row " << (i+2) << " masterLoc " << (masterLoc+1));
00190                         return false;
00191                     } else if (toupper((unsigned char) masterChar) != 
00192                                     toupper((unsigned char) alignment->master->sequenceString[masterLoc])) {
00193                         ERR_POST_X(8, Error << "master sequence mismatch at alnLoc " << (alnLoc+1)
00194                             << " row " << (i+2) << " masterLoc " << (masterLoc+1));
00195                         return false;
00196                     }
00197                 }
00198             }
00199             if (!IsGap(slaveChar)) {
00200                 ++slaveLoc;
00201                 if (slaveLoc >= (int) alignment->slave->sequenceString.size()) {
00202                     ERR_POST_X(9, Error << "slave sequence too long at alnLoc " << (alnLoc+1)
00203                         << " row " << (i+2) << " slaveLoc " << (slaveLoc+1));
00204                     return false;
00205                 } else if (toupper((unsigned char) slaveChar) != 
00206                                 toupper((unsigned char) alignment->slave->sequenceString[slaveLoc])) {
00207                     ERR_POST_X(10, Error << "slave sequence mismatch at alnLoc " << (alnLoc+1)
00208                         << " row " << (i+2) << " slaveLoc " << (slaveLoc+1));
00209                     return false;
00210                 }
00211             }
00212             currentMasterLoc = IsGap(masterChar) ? -1 : masterLoc;
00213             currentSlaveLoc = IsGap(slaveChar) ? -1 : slaveLoc;
00214 
00215             // check display characters, to see if they match alignment data
00216             if (IsGap(slaveChar) || IsUnaligned(slaveChar)) {
00217                 if (currentMasterLoc >= 0 && alignment->masterToSlave[currentMasterLoc] != -1) {
00218                     ERR_POST_X(11, Error << "slave should be marked aligned at alnLoc " << (alnLoc+1)
00219                         << " row " << (i+2));
00220                     return false;
00221                 }
00222             }
00223             if (IsAligned(slaveChar)) {
00224                 if (!IsAligned(masterChar)) {
00225                     ERR_POST_X(12, Error <<" slave marked aligned but master unaligned at alnLoc " << (alnLoc+1)
00226                         << " row " << (i+2));
00227                     return false;
00228                 }
00229                 if (alignment->masterToSlave[currentMasterLoc] == -1) {
00230                     ERR_POST_X(13, Error << "slave incorrectly marked aligned at alnLoc " << (alnLoc+1)
00231                         << " row " << (i+2));
00232                     return false;
00233                 }
00234                 if (alignment->masterToSlave[currentMasterLoc] != currentSlaveLoc) {
00235                     ERR_POST_X(14, Error << "wrong slave residue aligned at alnLoc " << (alnLoc+1)
00236                         << " row " << (i+2));
00237                     return false;
00238                 }
00239             }
00240 
00241             // converse: make sure alignment data is correctly reflected in display
00242             if (!IsGap(masterChar)) {
00243                 if (alignment->masterToSlave[currentMasterLoc] == -1) {
00244                     if (IsAligned(slaveChar)) {
00245                         ERR_POST_X(15, Error << "slave should be unaligned at alnLoc " << (alnLoc+1)
00246                             << " row " << (i+2));
00247                         return false;
00248                     }
00249                 } else {    // aligned master
00250                     if (!IsAligned(slaveChar)) {
00251                         ERR_POST_X(16, Error << "slave should be aligned at alnLoc " << (alnLoc+1)
00252                             << " row " << (i+2));
00253                         return false;
00254                     }
00255                     if (currentSlaveLoc != alignment->masterToSlave[currentMasterLoc]) {
00256                         ERR_POST_X(17, Error << "wrong slave residue aligned to master at alnLoc " << (alnLoc+1)
00257                             << " row " << (i+2));
00258                         return false;
00259                     }
00260                 }
00261             }
00262         }
00263 
00264         // check sequence lengths
00265         if (masterLoc != alignment->master->sequenceString.size() - 1 ||
00266             slaveLoc != alignment->slave->sequenceString.size() - 1) {
00267             ERR_POST_X(18, Error << "bad sequence lengths at row " << (i+2));
00268             return false;
00269         }
00270     }
00271     return true;
00272 }
00273 
00274 END_NCBI_SCOPE
00275 
00276 
00277 // leave the main function outside the NCBI namespace, just in case that might
00278 // cause any problems when linking it to C code...
00279 USING_NCBI_SCOPE;
00280 
00281 int CAV_DisplayMultiple(
00282     const SeqEntryList& sequences,
00283     const SeqAnnotList& alignments,
00284     unsigned int options,
00285     unsigned int paragraphWidth,
00286     double conservationThreshhold,
00287     const char *title,
00288     int nFeatures,
00289     const AlignmentFeature *alnFeatures,
00290     CNcbiOstream *outputStream,
00291     CNcbiOstream *diagnosticStream)
00292 {
00293     // make sure C++ output streams are sync'ed with C's stdio
00294     IOS_BASE::sync_with_stdio(true);
00295 
00296     // set up output streams (send all diagnostic messages to a different stream)
00297     CNcbiOstream *outStream;
00298     if (outputStream)
00299         outStream = outputStream;
00300     else
00301         outStream = &NcbiCout;
00302     if (options & CAV_NO_CHANGE_DIAG) {
00303         defaultDiagPostLevel = SetDiagPostLevel();
00304         SetDiagPostLevel(defaultDiagPostLevel);
00305     } else {
00306         if (diagnosticStream)
00307             SetDiagStream(diagnosticStream);
00308         else
00309             SetDiagStream(&NcbiCerr);
00310         if (options & CAV_DEBUG)
00311             SetDiagPostLevel(defaultDiagPostLevel = eDiag_Info);   // show all messages
00312         else
00313             SetDiagPostLevel(defaultDiagPostLevel = eDiag_Error);  // show only errors
00314     }
00315 
00316     // check option consistency
00317     if (options & CAV_CONDENSED && !(options & CAV_TEXT || options & CAV_HTML)) {
00318         ERR_POST_X(19, Error << "Cannot do condensed display except with text/HTML output");
00319         return CAV_ERROR_BAD_PARAMS;
00320     }
00321     if (options & CAV_FASTA_LOWERCASE && !(options & CAV_FASTA)) {
00322         ERR_POST_X(20, Error << "Cannot do fasta_lc option except with FASTA output");
00323         return CAV_ERROR_BAD_PARAMS;
00324     }
00325     if (options & CAV_HTML_HEADER && !(options & CAV_HTML)) {
00326         ERR_POST_X(21, Error << "Cannot do HTML header without HTML output");
00327         return CAV_ERROR_BAD_PARAMS;
00328     }
00329 
00330     // process asn data
00331     auto_ptr<SequenceSet> sequenceSet(new SequenceSet(sequences));
00332     if (!sequenceSet.get() || sequenceSet->Status() != CAV_SUCCESS) {
00333         ERR_POST_X(22, Critical << "Error processing sequence data");
00334         return sequenceSet->Status();
00335     }
00336     auto_ptr<AlignmentSet> alignmentSet(new AlignmentSet(sequenceSet.get(), alignments));
00337     if (!alignmentSet.get() || alignmentSet->Status() != CAV_SUCCESS) {
00338         ERR_POST_X(23, Critical << "Error processing alignment data");
00339         return alignmentSet->Status();
00340     }
00341 
00342     // create the alignment display structure
00343     auto_ptr<AlignmentDisplay> display(new AlignmentDisplay(sequenceSet.get(), alignmentSet.get()));
00344     if (!display.get() || display->Status() != CAV_SUCCESS) {
00345         ERR_POST_X(24, Critical << "Error creating alignment display");
00346         return display->Status();
00347     }
00348 
00349     // do verification
00350     if (options & CAV_DEBUG) {
00351         if (!VerifyAlignmentData(alignmentSet.get(), display.get())) {
00352             ERR_POST_X(25, Critical << "AlignmentDisplay failed verification");
00353             return CAV_ERROR_DISPLAY;
00354         } else {
00355             ERR_POST_X(26, Info << "AlignmentDisplay passed verification");
00356         }
00357     }
00358 
00359     // display alignment with given parameters
00360     ERR_POST_X(27, Info << "writing output...");
00361     int
00362         from = (options & CAV_LEFTTAILS) ? 0 : display->GetFirstAlignedLoc(),
00363         to = (options & CAV_RIGHTTAILS) ? display->GetWidth()-1 : display->GetLastAlignedLoc();
00364     if (options & CAV_SHOW_IDENTITY) conservationThreshhold = AlignmentDisplay::SHOW_IDENTITY;
00365     int retval = CAV_ERROR_BAD_PARAMS;
00366     if (options & CAV_TEXT || options & CAV_HTML) {
00367         if (options & CAV_CONDENSED)
00368             retval = display->DumpCondensed(*outStream, options,
00369                 from, to, paragraphWidth, conservationThreshhold, title, nFeatures, alnFeatures);
00370         else
00371             retval = display->DumpText(*outStream, options,
00372                 from, to, paragraphWidth, conservationThreshhold, title, nFeatures, alnFeatures);
00373     } else if (options & CAV_FASTA) {
00374         retval = display->DumpFASTA(from, to, paragraphWidth,
00375             ((options & CAV_FASTA_LOWERCASE) > 0), *outStream);
00376     }
00377 //    if (outStream != &NcbiCout) delete outStream;
00378     if (retval != CAV_SUCCESS) {
00379         ERR_POST_X(28, Error << "Error dumping display to output");
00380         return retval;
00381     }
00382 
00383     return CAV_SUCCESS;
00384 }
00385 
00386 int CAV_DisplayMultiple(
00387     const void *asnDataBlock,
00388     int asnSize,
00389     unsigned int options,
00390     unsigned int paragraphWidth,
00391     double conservationThreshhold,
00392     const char *title,
00393     int nFeatures,
00394     const AlignmentFeature *alnFeatures,
00395     CNcbiOstream *outputStream,
00396     CNcbiOstream *diagnosticStream)
00397 {
00398     // load input data into an input stream
00399     if (!asnDataBlock) {
00400         ERR_POST_X(29, Critical << "NULL asnDataBlock parameter");
00401         return CAV_ERROR_BAD_ASN;
00402     }
00403     CNcbiIstrstream asnIstrstream(static_cast<const char*>(asnDataBlock), asnSize);
00404 
00405     // load asn data block
00406     const SeqEntryList *seqs;
00407     const SeqAnnotList *alns;
00408     int retval = LoadASNFromIstream(asnIstrstream, &seqs, &alns);
00409     if (retval != CAV_SUCCESS) {
00410         ERR_POST_X(30, Critical << "Couldn't get sequence and alignment ASN data");
00411         return retval;
00412     }
00413 
00414     // make sure these get freed
00415     auto_ptr<const SeqEntryList> sequences(seqs);
00416     auto_ptr<const SeqAnnotList> alignments(alns);
00417 
00418     return CAV_DisplayMultiple(*seqs, *alns, options, paragraphWidth, conservationThreshhold,
00419         title, nFeatures, alnFeatures, outputStream, diagnosticStream);
00420 }
00421 
00422 int CAV_DisplayMultiple(
00423     const void *asnDataBlock,
00424     int asnSize,
00425     unsigned int options,
00426     unsigned int paragraphWidth,
00427     double conservationThreshhold,
00428     const char *title,
00429     int nFeatures,
00430     const AlignmentFeature *features)
00431 {
00432     return CAV_DisplayMultiple(asnDataBlock, asnSize, options, paragraphWidth,
00433         conservationThreshhold, title, nFeatures, features, NULL, NULL);
00434 }
00435 
00436 int CAV_DisplayMultiple(
00437     const ncbi::objects::CNcbi_mime_asn1& mime,
00438     unsigned int options,
00439     unsigned int paragraphWidth,
00440     double conservationThreshhold,
00441     const char *title,
00442     int nFeatures,
00443     const AlignmentFeature *features,
00444     ncbi::CNcbiOstream *outputStream,
00445     ncbi::CNcbiOstream *diagnosticStream)
00446 {
00447     const SeqEntryList *sequences = NULL;
00448     SeqEntryList localSeqList;
00449     const SeqAnnotList *alignments = NULL;
00450 
00451     if (mime.IsStrucseqs()) {
00452         sequences = &(mime.GetStrucseqs().GetSequences());
00453         alignments = &(mime.GetStrucseqs().GetSeqalign());
00454     } else if (mime.IsAlignstruc()) {
00455         sequences = &(mime.GetAlignstruc().GetSequences());
00456         alignments = &(mime.GetAlignstruc().GetSeqalign());
00457     } else if (mime.IsAlignseq()) {
00458         sequences = &(mime.GetAlignseq().GetSequences());
00459         alignments = &(mime.GetAlignseq().GetSeqalign());
00460     } else if (mime.IsGeneral()) {
00461         if (mime.GetGeneral().GetSeq_align_data().IsBundle()) {
00462             sequences = &(mime.GetGeneral().GetSeq_align_data().GetBundle().GetSequences());
00463             alignments = &(mime.GetGeneral().GetSeq_align_data().GetBundle().GetSeqaligns());
00464         } else if (mime.GetGeneral().GetSeq_align_data().IsCdd()) {
00465             localSeqList.resize(1);
00466             localSeqList.front().Reset(const_cast<CSeq_entry*>(&(mime.GetGeneral().GetSeq_align_data().GetCdd().GetSequences())));
00467             sequences = &localSeqList;
00468             alignments = &(mime.GetGeneral().GetSeq_align_data().GetCdd().GetSeqannot());
00469         }
00470     }
00471 
00472     if (!sequences || !alignments) {
00473         ERR_POST_X(31, Error << "Ncbi-mime-asn1 object is not of recognized type");
00474         return CAV_ERROR_BAD_ASN;
00475     }
00476 
00477     return CAV_DisplayMultiple(*sequences, *alignments, options, paragraphWidth, conservationThreshhold,
00478         title, nFeatures, features, outputStream, diagnosticStream);
00479 }
00480 
00481 

Generated on Sun Dec 6 22:41:07 2009 for NCBI C++ ToolKit by  doxygen 1.4.6
Modified on Mon Dec 07 16:21:11 2009 by modify_doxy.py rev. 173732