00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034 #include <ncbi_pch.hpp>
00035 #include <corelib/ncbistl.hpp>
00036 #include <corelib/ncbistre.hpp>
00037 #include <corelib/ncbi_limits.h>
00038 #include <corelib/ncbidiag.hpp>
00039
00040 #include <list>
00041 #include <memory>
00042
00043 #include <objects/cdd/Cdd.hpp>
00044 #include <objects/ncbimime/Ncbi_mime_asn1.hpp>
00045 #include <objects/ncbimime/Biostruc_seqs.hpp>
00046 #include <objects/ncbimime/Biostruc_align.hpp>
00047 #include <objects/ncbimime/Biostruc_align_seq.hpp>
00048 #include <objects/ncbimime/Biostruc_seqs_aligns_cdd.hpp>
00049 #include <objects/ncbimime/Bundle_seqs_aligns.hpp>
00050
00051 #include <objtools/cddalignview/cddalignview.h>
00052 #include <objtools/cddalignview/cav_seqset.hpp>
00053 #include <objtools/cddalignview/cav_alignset.hpp>
00054 #include <objtools/cddalignview/cav_asnio.hpp>
00055 #include <objtools/cddalignview/cav_alndisplay.hpp>
00056 #include <objtools/error_codes.hpp>
00057
00058
00059 #define NCBI_USE_ERRCODE_X Objtools_CAV_Func
00060
00061
00062 BEGIN_NCBI_SCOPE
00063 USING_SCOPE(objects);
00064
00065 static EDiagSev defaultDiagPostLevel;
00066
00067 static int LoadASNFromIstream(CNcbiIstream& asnIstream,
00068 const SeqEntryList* *sequences, const SeqAnnotList* *alignments)
00069 {
00070 *sequences = NULL;
00071 *alignments = NULL;
00072
00073
00074 static const string
00075 asciiMimeFirstWord = "Ncbi-mime-asn1",
00076 asciiCDDFirstWord = "Cdd";
00077 bool isMime = false, isCDD = false, isBinary = true;
00078
00079 string firstWord;
00080 asnIstream >> firstWord;
00081 if (firstWord == asciiMimeFirstWord) {
00082 isMime = true;
00083 isBinary = false;
00084 } else if (firstWord == asciiCDDFirstWord) {
00085 isCDD = true;
00086 isBinary = false;
00087 }
00088
00089
00090 auto_ptr<SeqEntryList> newSequences(new SeqEntryList());
00091 auto_ptr<SeqAnnotList> newAlignments(new SeqAnnotList());
00092 bool readOK = false;
00093 string err;
00094
00095 if (!isMime) {
00096
00097
00098 CRef < CCdd > cdd(new CCdd);
00099 SetDiagPostLevel(eDiag_Fatal);
00100 asnIstream.seekg(0);
00101 readOK = ReadASNFromIstream(asnIstream, *cdd, isBinary, err);
00102 SetDiagPostLevel(defaultDiagPostLevel);
00103 if (readOK) {
00104 newSequences->resize(1);
00105 newSequences->front().Reset(&(cdd->SetSequences()));
00106 *newAlignments = cdd->GetSeqannot();
00107 }
00108 }
00109
00110 if (!readOK) {
00111
00112
00113 CRef < CNcbi_mime_asn1 > mime(new CNcbi_mime_asn1);
00114 SetDiagPostLevel(eDiag_Fatal);
00115 asnIstream.seekg(0);
00116 readOK = ReadASNFromIstream(asnIstream, *mime, isBinary, err);
00117 SetDiagPostLevel(defaultDiagPostLevel);
00118 if (readOK) {
00119
00120 if (mime->IsStrucseqs()) {
00121 *newSequences = mime->GetStrucseqs().GetSequences();
00122 *newAlignments = mime->GetStrucseqs().GetSeqalign();
00123 } else if (mime->IsAlignstruc()) {
00124 *newSequences = mime->GetAlignstruc().GetSequences();
00125 *newAlignments = mime->GetAlignstruc().GetSeqalign();
00126 } else if (mime->IsAlignseq()) {
00127 *newSequences = mime->GetAlignseq().GetSequences();
00128 *newAlignments = mime->GetAlignseq().GetSeqalign();
00129 } else if (mime->IsGeneral()) {
00130 if (mime->GetGeneral().GetSeq_align_data().IsBundle()) {
00131 *newSequences = mime->GetGeneral().GetSeq_align_data().GetBundle().GetSequences();
00132 *newAlignments = mime->GetGeneral().GetSeq_align_data().GetBundle().GetSeqaligns();
00133 } else if (mime->GetGeneral().GetSeq_align_data().IsCdd()) {
00134 newSequences->resize(1);
00135 newSequences->front().Reset(&(mime->SetGeneral().SetSeq_align_data().SetCdd().SetSequences()));
00136 *newAlignments = mime->GetGeneral().GetSeq_align_data().GetCdd().GetSeqannot();
00137 }
00138 }
00139 }
00140 }
00141
00142 if (!readOK) {
00143 ERR_POST_X(3, Error << "Input is not a recognized data type (Ncbi-mime-asn1 or Cdd) : " << err);
00144 return CAV_ERROR_BAD_ASN;
00145 }
00146 if (newSequences->size() == 0 || newAlignments->size() == 0) {
00147 ERR_POST_X(4, Error << "Cannot find sequences and alignments in the input data!");
00148 return CAV_ERROR_BAD_ASN;
00149 }
00150
00151 *sequences = newSequences.release();
00152 *alignments = newAlignments.release();
00153 return CAV_SUCCESS;
00154 }
00155
00156
00157
00158
00159 static bool VerifyAlignmentData(const AlignmentSet *alignmentSet, const AlignmentDisplay *display)
00160 {
00161 int alnLoc, masterLoc, slaveLoc, currentMasterLoc, currentSlaveLoc;
00162 char masterChar, slaveChar;
00163 const MasterSlaveAlignment *alignment;
00164
00165 for (unsigned int i=0; i<alignmentSet->alignments.size(); ++i) {
00166 masterLoc = slaveLoc = -1;
00167 alignment = alignmentSet->alignments[i];
00168
00169 for (alnLoc=0; alnLoc<(int)display->GetWidth(); ++alnLoc) {
00170
00171
00172 masterChar = display->GetCharAt(alnLoc, 0);
00173 if (masterChar == '?') {
00174 ERR_POST_X(5, Error << "bad alignment coordinate: loc " << (alnLoc+1) << " row 1 (master)");
00175 return false;
00176 }
00177 slaveChar = display->GetCharAt(alnLoc, 1 + i);
00178 if (slaveChar == '?') {
00179 ERR_POST_X(6, Error << "bad alignment coordinate: loc " << (alnLoc+1) << " row " << (i+2));
00180 return false;
00181 }
00182
00183
00184 if (!IsGap(masterChar)) {
00185 ++masterLoc;
00186 if (i == 0) {
00187 if (masterLoc >= (int) alignment->master->sequenceString.size()) {
00188 ERR_POST_X(7, Error << "master sequence too long at alnLoc " << (alnLoc+1)
00189 << " row " << (i+2) << " masterLoc " << (masterLoc+1));
00190 return false;
00191 } else if (toupper((unsigned char) masterChar) !=
00192 toupper((unsigned char) alignment->master->sequenceString[masterLoc])) {
00193 ERR_POST_X(8, Error << "master sequence mismatch at alnLoc " << (alnLoc+1)
00194 << " row " << (i+2) << " masterLoc " << (masterLoc+1));
00195 return false;
00196 }
00197 }
00198 }
00199 if (!IsGap(slaveChar)) {
00200 ++slaveLoc;
00201 if (slaveLoc >= (int) alignment->slave->sequenceString.size()) {
00202 ERR_POST_X(9, Error << "slave sequence too long at alnLoc " << (alnLoc+1)
00203 << " row " << (i+2) << " slaveLoc " << (slaveLoc+1));
00204 return false;
00205 } else if (toupper((unsigned char) slaveChar) !=
00206 toupper((unsigned char) alignment->slave->sequenceString[slaveLoc])) {
00207 ERR_POST_X(10, Error << "slave sequence mismatch at alnLoc " << (alnLoc+1)
00208 << " row " << (i+2) << " slaveLoc " << (slaveLoc+1));
00209 return false;
00210 }
00211 }
00212 currentMasterLoc = IsGap(masterChar) ? -1 : masterLoc;
00213 currentSlaveLoc = IsGap(slaveChar) ? -1 : slaveLoc;
00214
00215
00216 if (IsGap(slaveChar) || IsUnaligned(slaveChar)) {
00217 if (currentMasterLoc >= 0 && alignment->masterToSlave[currentMasterLoc] != -1) {
00218 ERR_POST_X(11, Error << "slave should be marked aligned at alnLoc " << (alnLoc+1)
00219 << " row " << (i+2));
00220 return false;
00221 }
00222 }
00223 if (IsAligned(slaveChar)) {
00224 if (!IsAligned(masterChar)) {
00225 ERR_POST_X(12, Error <<" slave marked aligned but master unaligned at alnLoc " << (alnLoc+1)
00226 << " row " << (i+2));
00227 return false;
00228 }
00229 if (alignment->masterToSlave[currentMasterLoc] == -1) {
00230 ERR_POST_X(13, Error << "slave incorrectly marked aligned at alnLoc " << (alnLoc+1)
00231 << " row " << (i+2));
00232 return false;
00233 }
00234 if (alignment->masterToSlave[currentMasterLoc] != currentSlaveLoc) {
00235 ERR_POST_X(14, Error << "wrong slave residue aligned at alnLoc " << (alnLoc+1)
00236 << " row " << (i+2));
00237 return false;
00238 }
00239 }
00240
00241
00242 if (!IsGap(masterChar)) {
00243 if (alignment->masterToSlave[currentMasterLoc] == -1) {
00244 if (IsAligned(slaveChar)) {
00245 ERR_POST_X(15, Error << "slave should be unaligned at alnLoc " << (alnLoc+1)
00246 << " row " << (i+2));
00247 return false;
00248 }
00249 } else {
00250 if (!IsAligned(slaveChar)) {
00251 ERR_POST_X(16, Error << "slave should be aligned at alnLoc " << (alnLoc+1)
00252 << " row " << (i+2));
00253 return false;
00254 }
00255 if (currentSlaveLoc != alignment->masterToSlave[currentMasterLoc]) {
00256 ERR_POST_X(17, Error << "wrong slave residue aligned to master at alnLoc " << (alnLoc+1)
00257 << " row " << (i+2));
00258 return false;
00259 }
00260 }
00261 }
00262 }
00263
00264
00265 if (masterLoc != alignment->master->sequenceString.size() - 1 ||
00266 slaveLoc != alignment->slave->sequenceString.size() - 1) {
00267 ERR_POST_X(18, Error << "bad sequence lengths at row " << (i+2));
00268 return false;
00269 }
00270 }
00271 return true;
00272 }
00273
00274 END_NCBI_SCOPE
00275
00276
00277
00278
00279 USING_NCBI_SCOPE;
00280
00281 int CAV_DisplayMultiple(
00282 const SeqEntryList& sequences,
00283 const SeqAnnotList& alignments,
00284 unsigned int options,
00285 unsigned int paragraphWidth,
00286 double conservationThreshhold,
00287 const char *title,
00288 int nFeatures,
00289 const AlignmentFeature *alnFeatures,
00290 CNcbiOstream *outputStream,
00291 CNcbiOstream *diagnosticStream)
00292 {
00293
00294 IOS_BASE::sync_with_stdio(true);
00295
00296
00297 CNcbiOstream *outStream;
00298 if (outputStream)
00299 outStream = outputStream;
00300 else
00301 outStream = &NcbiCout;
00302 if (options & CAV_NO_CHANGE_DIAG) {
00303 defaultDiagPostLevel = SetDiagPostLevel();
00304 SetDiagPostLevel(defaultDiagPostLevel);
00305 } else {
00306 if (diagnosticStream)
00307 SetDiagStream(diagnosticStream);
00308 else
00309 SetDiagStream(&NcbiCerr);
00310 if (options & CAV_DEBUG)
00311 SetDiagPostLevel(defaultDiagPostLevel = eDiag_Info);
00312 else
00313 SetDiagPostLevel(defaultDiagPostLevel = eDiag_Error);
00314 }
00315
00316
00317 if (options & CAV_CONDENSED && !(options & CAV_TEXT || options & CAV_HTML)) {
00318 ERR_POST_X(19, Error << "Cannot do condensed display except with text/HTML output");
00319 return CAV_ERROR_BAD_PARAMS;
00320 }
00321 if (options & CAV_FASTA_LOWERCASE && !(options & CAV_FASTA)) {
00322 ERR_POST_X(20, Error << "Cannot do fasta_lc option except with FASTA output");
00323 return CAV_ERROR_BAD_PARAMS;
00324 }
00325 if (options & CAV_HTML_HEADER && !(options & CAV_HTML)) {
00326 ERR_POST_X(21, Error << "Cannot do HTML header without HTML output");
00327 return CAV_ERROR_BAD_PARAMS;
00328 }
00329
00330
00331 auto_ptr<SequenceSet> sequenceSet(new SequenceSet(sequences));
00332 if (!sequenceSet.get() || sequenceSet->Status() != CAV_SUCCESS) {
00333 ERR_POST_X(22, Critical << "Error processing sequence data");
00334 return sequenceSet->Status();
00335 }
00336 auto_ptr<AlignmentSet> alignmentSet(new AlignmentSet(sequenceSet.get(), alignments));
00337 if (!alignmentSet.get() || alignmentSet->Status() != CAV_SUCCESS) {
00338 ERR_POST_X(23, Critical << "Error processing alignment data");
00339 return alignmentSet->Status();
00340 }
00341
00342
00343 auto_ptr<AlignmentDisplay> display(new AlignmentDisplay(sequenceSet.get(), alignmentSet.get()));
00344 if (!display.get() || display->Status() != CAV_SUCCESS) {
00345 ERR_POST_X(24, Critical << "Error creating alignment display");
00346 return display->Status();
00347 }
00348
00349
00350 if (options & CAV_DEBUG) {
00351 if (!VerifyAlignmentData(alignmentSet.get(), display.get())) {
00352 ERR_POST_X(25, Critical << "AlignmentDisplay failed verification");
00353 return CAV_ERROR_DISPLAY;
00354 } else {
00355 ERR_POST_X(26, Info << "AlignmentDisplay passed verification");
00356 }
00357 }
00358
00359
00360 ERR_POST_X(27, Info << "writing output...");
00361 int
00362 from = (options & CAV_LEFTTAILS) ? 0 : display->GetFirstAlignedLoc(),
00363 to = (options & CAV_RIGHTTAILS) ? display->GetWidth()-1 : display->GetLastAlignedLoc();
00364 if (options & CAV_SHOW_IDENTITY) conservationThreshhold = AlignmentDisplay::SHOW_IDENTITY;
00365 int retval = CAV_ERROR_BAD_PARAMS;
00366 if (options & CAV_TEXT || options & CAV_HTML) {
00367 if (options & CAV_CONDENSED)
00368 retval = display->DumpCondensed(*outStream, options,
00369 from, to, paragraphWidth, conservationThreshhold, title, nFeatures, alnFeatures);
00370 else
00371 retval = display->DumpText(*outStream, options,
00372 from, to, paragraphWidth, conservationThreshhold, title, nFeatures, alnFeatures);
00373 } else if (options & CAV_FASTA) {
00374 retval = display->DumpFASTA(from, to, paragraphWidth,
00375 ((options & CAV_FASTA_LOWERCASE) > 0), *outStream);
00376 }
00377
00378 if (retval != CAV_SUCCESS) {
00379 ERR_POST_X(28, Error << "Error dumping display to output");
00380 return retval;
00381 }
00382
00383 return CAV_SUCCESS;
00384 }
00385
00386 int CAV_DisplayMultiple(
00387 const void *asnDataBlock,
00388 int asnSize,
00389 unsigned int options,
00390 unsigned int paragraphWidth,
00391 double conservationThreshhold,
00392 const char *title,
00393 int nFeatures,
00394 const AlignmentFeature *alnFeatures,
00395 CNcbiOstream *outputStream,
00396 CNcbiOstream *diagnosticStream)
00397 {
00398
00399 if (!asnDataBlock) {
00400 ERR_POST_X(29, Critical << "NULL asnDataBlock parameter");
00401 return CAV_ERROR_BAD_ASN;
00402 }
00403 CNcbiIstrstream asnIstrstream(static_cast<const char*>(asnDataBlock), asnSize);
00404
00405
00406 const SeqEntryList *seqs;
00407 const SeqAnnotList *alns;
00408 int retval = LoadASNFromIstream(asnIstrstream, &seqs, &alns);
00409 if (retval != CAV_SUCCESS) {
00410 ERR_POST_X(30, Critical << "Couldn't get sequence and alignment ASN data");
00411 return retval;
00412 }
00413
00414
00415 auto_ptr<const SeqEntryList> sequences(seqs);
00416 auto_ptr<const SeqAnnotList> alignments(alns);
00417
00418 return CAV_DisplayMultiple(*seqs, *alns, options, paragraphWidth, conservationThreshhold,
00419 title, nFeatures, alnFeatures, outputStream, diagnosticStream);
00420 }
00421
00422 int CAV_DisplayMultiple(
00423 const void *asnDataBlock,
00424 int asnSize,
00425 unsigned int options,
00426 unsigned int paragraphWidth,
00427 double conservationThreshhold,
00428 const char *title,
00429 int nFeatures,
00430 const AlignmentFeature *features)
00431 {
00432 return CAV_DisplayMultiple(asnDataBlock, asnSize, options, paragraphWidth,
00433 conservationThreshhold, title, nFeatures, features, NULL, NULL);
00434 }
00435
00436 int CAV_DisplayMultiple(
00437 const ncbi::objects::CNcbi_mime_asn1& mime,
00438 unsigned int options,
00439 unsigned int paragraphWidth,
00440 double conservationThreshhold,
00441 const char *title,
00442 int nFeatures,
00443 const AlignmentFeature *features,
00444 ncbi::CNcbiOstream *outputStream,
00445 ncbi::CNcbiOstream *diagnosticStream)
00446 {
00447 const SeqEntryList *sequences = NULL;
00448 SeqEntryList localSeqList;
00449 const SeqAnnotList *alignments = NULL;
00450
00451 if (mime.IsStrucseqs()) {
00452 sequences = &(mime.GetStrucseqs().GetSequences());
00453 alignments = &(mime.GetStrucseqs().GetSeqalign());
00454 } else if (mime.IsAlignstruc()) {
00455 sequences = &(mime.GetAlignstruc().GetSequences());
00456 alignments = &(mime.GetAlignstruc().GetSeqalign());
00457 } else if (mime.IsAlignseq()) {
00458 sequences = &(mime.GetAlignseq().GetSequences());
00459 alignments = &(mime.GetAlignseq().GetSeqalign());
00460 } else if (mime.IsGeneral()) {
00461 if (mime.GetGeneral().GetSeq_align_data().IsBundle()) {
00462 sequences = &(mime.GetGeneral().GetSeq_align_data().GetBundle().GetSequences());
00463 alignments = &(mime.GetGeneral().GetSeq_align_data().GetBundle().GetSeqaligns());
00464 } else if (mime.GetGeneral().GetSeq_align_data().IsCdd()) {
00465 localSeqList.resize(1);
00466 localSeqList.front().Reset(const_cast<CSeq_entry*>(&(mime.GetGeneral().GetSeq_align_data().GetCdd().GetSequences())));
00467 sequences = &localSeqList;
00468 alignments = &(mime.GetGeneral().GetSeq_align_data().GetCdd().GetSeqannot());
00469 }
00470 }
00471
00472 if (!sequences || !alignments) {
00473 ERR_POST_X(31, Error << "Ncbi-mime-asn1 object is not of recognized type");
00474 return CAV_ERROR_BAD_ASN;
00475 }
00476
00477 return CAV_DisplayMultiple(*sequences, *alignments, options, paragraphWidth, conservationThreshhold,
00478 title, nFeatures, features, outputStream, diagnosticStream);
00479 }
00480
00481