src/algo/structure/cd_utils/cuAlign.cpp

Go to the documentation of this file.
00001 /* $Id: cuAlign.cpp 162405 2009-06-05 13:38:53Z lanczyck $
00002  * ===========================================================================
00003  *
00004  *                            PUBLIC DOMAIN NOTICE
00005  *               National Center for Biotechnology Information
00006  *
00007  *  This software/database is a "United States Government Work" under the
00008  *  terms of the United States Copyright Act.  It was written as part of
00009  *  the author's official duties as a United States Government employee and
00010  *  thus cannot be copyrighted.  This software/database is freely available
00011  *  to the public for use. The National Library of Medicine and the U.S.
00012  *  Government have not placed any restriction on its use or reproduction.
00013  *
00014  *  Although all reasonable efforts have been taken to ensure the accuracy
00015  *  and reliability of the software and data, the NLM and the U.S.
00016  *  Government do not and cannot warrant the performance or results that
00017  *  may be obtained by using this software or data. The NLM and the U.S.
00018  *  Government disclaim all warranties, express or implied, including
00019  *  warranties of performance, merchantability or fitness for any particular
00020  *  purpose.
00021  *
00022  *  Please cite the author in any work or product based on this material.
00023  *
00024  * ===========================================================================
00025  *
00026  * Author:  Adapted from CDTree-1 code by Chris Lanczycki
00027  *
00028  * File Description:
00029  *        
00030  *          Utility routines for manipulating alignments.
00031  *
00032  * ===========================================================================
00033  */
00034 #include <ncbi_pch.hpp>
00035 #include <algo/structure/cd_utils/cuCppNCBI.hpp>
00036 
00037 #include <objects/seq/Seq_annot.hpp>
00038 #include <objects/seqalign/Seq_align_set.hpp>
00039 #include <objects/seqalign/Dense_diag.hpp>
00040 #include <objects/seqalign/Dense_seg.hpp>
00041 #include <objects/seqalign/Score.hpp>
00042 #include <objects/seqloc/Seq_interval.hpp>
00043 #include <objects/seqloc/Seq_loc.hpp>
00044 #include <objects/seqloc/PDB_seq_id.hpp>
00045 #include <objects/seqloc/PDB_mol_id.hpp>
00046 #include <objects/general/Object_id.hpp>
00047 
00048 #include <algo/structure/cd_utils/cuSequence.hpp>
00049 #include <algo/structure/cd_utils/cuUtils.hpp>
00050 #include <algo/structure/cd_utils/cuAlign.hpp>
00051 
00052 #include <stdio.h>
00053 
00054 BEGIN_NCBI_SCOPE
00055 BEGIN_SCOPE(cd_utils)
00056 
00057 bool GetSeqID(const CRef< CSeq_align >& seqAlign, CRef< CSeq_id >& SeqID, bool getSlave)
00058 {
00059     //-------------------------------------------------------------------------
00060     // get a SeqID.
00061     // first get the row'th DenDiag, then the Slave/Master's SeqID.
00062     //-------------------------------------------------------------------------
00063     CRef< CDense_diag > DenDiag;
00064     CDense_diag::TIds IdsSet;
00065     CDense_diag::TIds::iterator i;
00066 
00067     if (seqAlign.NotEmpty()) {
00068         if (seqAlign->GetSegs().IsDendiag() && GetFirstOrLastDenDiag(seqAlign, true, DenDiag)) {
00069             IdsSet = DenDiag->GetIds();
00070         } else if (seqAlign->GetSegs().IsDenseg()) {
00071             IdsSet = seqAlign->GetSegs().GetDenseg().GetIds();
00072         }
00073         i = IdsSet.begin();
00074         if (getSlave) 
00075         {
00076             i++;
00077         }
00078         SeqID = (*i);
00079         return(true);
00080     }
00081     return(false);
00082 }
00083 
00084 bool HasSeqID(const CRef< CSeq_align >& seqAlign, const CRef< CSeq_id >& SeqID, bool& isMaster)
00085 {
00086     //-------------------------------------------------------------------------
00087     // get a SeqID.
00088     // first get the row'th DenDiag, then the Slave/Master's SeqID.
00089     //-------------------------------------------------------------------------
00090     bool hasMatch = false;
00091     CRef< CDense_diag > DenDiag;
00092     CDense_diag::TIds IdsSet;
00093     CDense_diag::TIds::iterator i;
00094 
00095     if (seqAlign.NotEmpty()) {
00096         if (seqAlign->GetSegs().IsDendiag() && GetFirstOrLastDenDiag(seqAlign, true, DenDiag)) {
00097             IdsSet = DenDiag->GetIds();
00098         } else if (seqAlign->GetSegs().IsDenseg()) {
00099             IdsSet = seqAlign->GetSegs().GetDenseg().GetIds();
00100         }
00101         i = IdsSet.begin();
00102         while (!hasMatch && i != IdsSet.end()) {
00103             if (SeqIdsMatch(SeqID, *i)) {
00104                 hasMatch = true;
00105             }
00106             ++i;
00107         }
00108         isMaster = (hasMatch && (--i == IdsSet.begin()));
00109     }
00110     return(hasMatch);
00111 }
00112 
00113 int  SeqAlignRemap(CRef< CSeq_align >& source, int iSeq, CRef< CSeq_align >& guide, int iMaster, CRef< CSeq_align >& mappedAlign, int iMasterNew, int iSeqNew, int flags, string& err) {
00114     int nBlocks = 0;
00115     TDendiag mappedAlignDD;
00116     TDendiag *sourceDD, *guideDD;
00117 
00118     //  Sanity checks on input.
00119     err.erase();
00120     if (source.Empty()) {
00121         err = "SeqAlignRemap:  Empty alignment in source.\n";
00122     } else if (guide.Empty()) {
00123         err = "SeqAlignRemap:  Empty alignment in guide.\n";
00124     } else if (mappedAlign.Empty()) {
00125         err = "SeqAlignRemap:  Empty alignment in target mapped alignment.\n";
00126     }
00127     if (err.size() > 0) return nBlocks;
00128 
00129     if (source->GetDim() != guide->GetDim()) {
00130         err = "SeqAlignRemap:  Inconsistent dimensions for source and guide alignments.\n";
00131 //    } else if (source->GetType() != guide->GetType()) {
00132 //        err = "SeqAlignRemap:  Inconsistent types for source and guide alignments.\n";
00133     } else if (source->GetSegs().Which() != guide->GetSegs().Which()) {
00134         err = "SeqAlignRemap:  Inconsistent segment types for source and guide alignments.\n";
00135     }
00136     if (err.size() > 0) return nBlocks;
00137 
00138     /*
00139     bool dummy = false;
00140     if (dummy) {
00141         string err("");
00142         if (!WriteASNToFile("sourceAlign.aln", *source, false, &err)) {
00143             int i = 0;
00144         } 
00145         if (!WriteASNToFile("guideAlign.aln", *guide, false, &err)) {
00146             int i = 0;
00147         } 
00148     }
00149     */
00150 
00151     if (GetDDSetFromSeqAlign(*source, sourceDD) && GetDDSetFromSeqAlign(*guide, guideDD)) {
00152         mappedAlign->SetType(source->GetType());
00153         mappedAlign->SetDim(source->GetDim());
00154         mappedAlign->SetSegs().Select(source->GetSegs().Which());
00155         nBlocks = ddRemap(sourceDD, iSeq, guideDD, iMaster, &mappedAlignDD, iMasterNew, iSeqNew, flags, err);
00156         for (TDendiag::iterator ddIt = mappedAlignDD.begin(); ddIt != mappedAlignDD.end(); ++ddIt) {
00157             mappedAlign->SetSegs().SetDendiag().push_back(*ddIt);
00158         }
00159     }
00160 
00161     /*
00162     if (dummy) {
00163         string err("");
00164         if (!WriteASNToFile("mappedAlign.aln", *mappedAlign, false, &err)) {
00165             int i = 0;
00166         }
00167     } 
00168     */
00169 
00170     return nBlocks;
00171 }
00172 
00173 
00174 void MakeMaskedSeqAlign(const CRef< CSeq_align >& originalAlign, const CRef< CSeq_align >& maskAlign, CRef< CSeq_align >& maskedAlign, bool useMaskMaster, bool invertMask) {
00175     
00176     
00177     bool inputOK = true;
00178     bool isAligned, useOriginalMaster;
00179     TSeqPos newMasterStart, newSlaveStart, newLength;
00180     TSeqPos originalMasterStart, originalSlaveStart, originalLength, blockStart;
00181     
00182     CRef< CSeq_id > masterId, slaveId, maskId;
00183     const TDendiag* originalDDSet;
00184     TDendiag* maskedDDSet;
00185     TDendiag_cit originalBlock, originalBlock_end;
00186     
00187     if (originalAlign.Empty() || !GetSeqID(originalAlign, masterId, false) 
00188                               || !GetSeqID(originalAlign, slaveId, true)
00189                               || !GetDDSetFromSeqAlign(*originalAlign, originalDDSet)) {
00190         inputOK = false;
00191     } else if (maskAlign.Empty() || !GetSeqID(maskAlign, maskId, !useMaskMaster)) {
00192         inputOK = false;
00193     } else if (maskedAlign.Empty()) {
00194         inputOK = false;
00195     }
00196     
00197     if (SeqIdsMatch(masterId, maskId)) {
00198         useOriginalMaster = true;
00199     } else if (SeqIdsMatch(slaveId, maskId)) {
00200         useOriginalMaster = false;
00201     } else {
00202         useOriginalMaster = false;
00203         inputOK = false;
00204     }
00205 
00206     
00207     //  Set items from original alignment; optional 'score' and 'bounds'
00208     //  are not copied over as they're likely to be inconsistent once
00209     //  the mask has been applied.  Similarly, do not attempt to fill in
00210     //  the 'strands' and 'scores' fields of the individual new dense diags.
00211     maskedAlign->SetType(originalAlign->GetType());
00212     maskedAlign->SetDim(originalAlign->GetDim());
00213     maskedAlign->SetSegs().Select(originalAlign->GetSegs().Which());
00214     if (inputOK && GetDDSetFromSeqAlign(*maskedAlign, maskedDDSet)) {
00215         
00216         
00217         originalBlock_end = originalDDSet->end();
00218         for (originalBlock = originalDDSet->begin(); originalBlock != originalBlock_end; ++originalBlock) {
00219             
00220             originalMasterStart = (*originalBlock)->GetStarts().front();
00221             originalSlaveStart  = (*originalBlock)->GetStarts().back();
00222             originalLength      = (*originalBlock)->GetLen();
00223             newLength           = 0;
00224             newMasterStart      = 0;
00225             newSlaveStart       = 0;
00226             
00227             //  At each position on the original alignment, check if position is aligned.
00228             //  If so, start a new block on the masked alignment if not already in one.
00229             //  If not, add current block to masked alignment if it's the first unaligned
00230             //  residue after a string of aligned residues.  After scan block, check for
00231             //  a masked block stretching to the C-terminus of the original block.
00232             blockStart = (( useOriginalMaster) ? originalMasterStart : originalSlaveStart);
00233             for (TSeqPos blockPos = 0; blockPos < originalLength; ++blockPos) {
00234                 isAligned = IsPositionAligned(*maskAlign, blockPos + blockStart, useMaskMaster);
00235                 if ((isAligned && !invertMask) || (!isAligned && invertMask)) {
00236                     if (newLength == 0) {
00237                         newMasterStart = blockPos + blockStart;
00238                         newSlaveStart  = blockPos + ((!useOriginalMaster) ? originalMasterStart : originalSlaveStart);
00239                     }
00240                     ++newLength;
00241                 } else if (newLength > 0) {
00242                     AddIntervalToDD(maskedDDSet, masterId, slaveId, newMasterStart, newSlaveStart, newLength);
00243                     newLength = 0;
00244                 }
00245             }
00246             if (newLength > 0) {
00247                 AddIntervalToDD(maskedDDSet, masterId, slaveId, newMasterStart, newSlaveStart, newLength);
00248             }
00249             
00250         }
00251 
00252     }
00253 
00254 }
00255 
00256 
00257 bool SeqAlignsAreEquivalent(const CRef< CSeq_align >& align1, const CRef< CSeq_align >& align2, bool checkMasters) {
00258     bool result = false;
00259     const TDendiag* ddSet1;
00260     const TDendiag* ddSet2;
00261 
00262     if (GetDDSetFromSeqAlign(*align1, ddSet1) && GetDDSetFromSeqAlign(*align2, ddSet2)) {
00263         result = ddAreEquivalent(ddSet1, ddSet2, checkMasters);
00264     }
00265     return result;
00266 }
00267 
00268 void SeqAlignSwapMasterSlave(CRef< CSeq_align >& seqAlign, CRef< CSeq_align >& swappedSeqAlign) {
00269 
00270     int result = 0;
00271     TDendiag* originalDDSet;
00272     TDendiag* swappedDDSet;
00273 
00274     swappedSeqAlign->Assign(*seqAlign);  //  copies over the non-DD data
00275     if (GetDDSetFromSeqAlign(*seqAlign, originalDDSet) && GetDDSetFromSeqAlign(*swappedSeqAlign, swappedDDSet)) {
00276         swappedDDSet->clear();
00277         result = ddRecompose(originalDDSet, 1, 0, swappedDDSet);
00278     }
00279 }
00280 
00281 //  Assumes CDD-style seq-align using Dendiags with dimension 2.
00282 bool ChangeSeqIdInSeqAlign(CRef< CSeq_align>& sa, const CRef< CSeq_id >& newSeqId, bool onMaster)
00283 {
00284     bool result = (sa->SetSegs().IsDendiag() && sa->SetSegs().SetDendiag().size() > 0);
00285     TDendiag_it ddIt, ddEnd;
00286     unsigned int index = (onMaster) ? 0 : 1;
00287 
00288     //  Sanity check the dendiag...
00289     if (result) {
00290         ddIt  = sa->SetSegs().SetDendiag().begin();
00291         ddEnd = sa->SetSegs().SetDendiag().end();
00292         for (; ddIt != ddEnd; ++ddIt) {
00293             if ((*ddIt)->GetDim() != 2 || (*ddIt)->GetIds().size() != 2) {
00294                 result = false;
00295                 break;
00296             }
00297         }
00298     }
00299 
00300     if (result) {
00301         ddIt  = sa->SetSegs().SetDendiag().begin();
00302         ddEnd = sa->SetSegs().SetDendiag().end();
00303         CDense_diag::TIds ids;
00304         for (; ddIt != ddEnd; ++ddIt) {
00305             ids = (*ddIt)->SetIds();
00306             ids[index]->Assign(*newSeqId);
00307         }
00308     }
00309 
00310     return result;
00311 }
00312 
00313 
00314 //  convenience function
00315 int MapPositionToMaster(int childPos, const CSeq_align&  align) {
00316 
00317     return MapPosition(align, childPos, CHILD_TO_MASTER);
00318 }
00319 
00320 //  convenience function
00321 int MapPositionToChild(int masterPos, const CSeq_align&  align) {
00322 
00323     return MapPosition(align, masterPos, MASTER_TO_CHILD);
00324 }
00325 
00326 /*  RENAME  */   
00327 //  Was CCdCore::GetSeqPosition(const TDendiag* ddlist, int Position, bool OnMasterRow) {
00328 int MapPosition(const CSeq_align& seqAlign, int Position, CoordMapDir mapDir) {
00329 //---------------------------------------------------------------------------
00330 // If mapDir = MASTER_TO_CHILD, then get position on slave 
00331 // row that corresponds to Position on master row.  Otherwise (i.e. CHILD_TO_MASTER),
00332 // get position on master row that corresponds to Position on slave row.
00333 // Assumes the Seq_align is a standard CD alignment of two sequences.
00334 //---------------------------------------------------------------------------
00335   TDendiag_cit  i, ddend;
00336   CDense_diag::TStarts::const_iterator  k;
00337   int  Start, Len, OtherStart;
00338 
00339   const TDendiag* ddlist; // = new TDendiag;
00340   if (GetDDSetFromSeqAlign(seqAlign, ddlist)) {
00341 
00342     ddend = ddlist->end();
00343     for (i=ddlist->begin(); i!=ddend; i++) {
00344         k = (*i)->GetStarts().begin();
00345         Len = (*i)->GetLen();
00346         Start = (mapDir == MASTER_TO_CHILD) ? *k : *(++k);
00347 //      Start = OnMasterRow ? *k : *(++k);
00348         k = (*i)->GetStarts().begin();
00349         OtherStart = (mapDir == MASTER_TO_CHILD) ? *(++k)  : *k;
00350 //      OtherStart = OnMasterRow ? *(++k)  : *k;
00351         if ((Position >= Start) && (Position < (Start+Len))) {
00352             return(OtherStart + (Position-Start));
00353         }
00354     }
00355   }
00356 //  delete ddlist;
00357   return(INVALID_POSITION);
00358 }
00359 
00360 
00361 
00362 /*  ADDED  */  
00363 bool IsPositionAligned(const CSeq_align& seqAlign, int Position, bool onMaster) {
00364     bool result = false;
00365 
00366     if (Position == INVALID_POSITION) {
00367         return result;
00368     }
00369 
00370     const TDendiag* pDenDiagSet;  // = new TDendiag;
00371     if (GetDDSetFromSeqAlign(seqAlign, pDenDiagSet)) {
00372         result = IsPositionAligned(pDenDiagSet, Position, onMaster);
00373     }
00374     return result;
00375 }
00376 
00377 /*  ADDED  */  
00378 bool IsPositionAligned(const TDendiag*& pDenDiagSet, int Position, bool onMaster) {
00379     bool result = false;
00380     int start, stop;
00381     TDendiag_cit i, iend;
00382 
00383     if (Position == INVALID_POSITION) {
00384         return result;
00385     }
00386 
00387     //  for each block, check if Position is in range
00388     if (pDenDiagSet) {
00389         iend = pDenDiagSet->end();
00390         for (i=pDenDiagSet->begin(); i!=iend; i++) {
00391             start = (onMaster) ? (*i)->GetStarts().front() : (*i)->GetStarts().back();
00392             stop  = start + (*i)->GetLen() - 1;
00393             if (Position >= start && Position <= stop) {
00394                 result = true;
00395                 break;
00396             }
00397         }
00398     }
00399     return result;
00400 }
00401 
00402 //   Return the number of positions of align1 also aligned on align2.
00403 int  GetAlignedPositions(const CRef< CSeq_align >& align1, const CRef< CSeq_align >& align2, vector<int>& alignedPositions, bool onMaster) {
00404 
00405     int nBlocks, position;
00406     CRef< CSeq_id > align1Id,  align2Id;
00407     vector<int> align1Blocks, align1Starts;
00408 
00409     alignedPositions.clear();
00410 
00411     if (align1.NotEmpty() && align2.NotEmpty()) {
00412     
00413         //  The sequences need to be the same to check common positions.
00414         if (GetSeqID(align1, align1Id, !onMaster) && GetSeqID(align2, align2Id, !onMaster) && 
00415             SeqIdsMatch(align1Id, align2Id)) {
00416         
00417             GetBlockLengths(align1, align1Blocks);
00418             GetBlockStarts(align1, align1Starts, onMaster);
00419         
00420             //  Look for residues from align1 aligned on align2.
00421             nBlocks = align1Blocks.size();
00422             for (int i = 0; i < nBlocks; ++i) {
00423                 position = align1Starts[i];
00424                 for (int j = 0; j < align1Blocks[i]; ++j) {
00425                     if (IsPositionAligned(*align2, position, onMaster)) {
00426                         alignedPositions.push_back(position);
00427                     }
00428                     ++position;
00429                 }
00430             }
00431         }
00432     }
00433     return alignedPositions.size();
00434 }
00435 
00436 
00437 int  GetNumAlignedResidues(const CRef< CSeq_align >& seqAlign) {
00438 
00439   TDendiag_cit i;
00440   int  Len=0;
00441 
00442   if (seqAlign.Empty()) {
00443       return Len;
00444   }
00445 
00446   // get den-diags for master row; sum lengths
00447   const TDendiag* pDenDiagSet;  // = new TDendiag;
00448   if (GetDDSetFromSeqAlign(*seqAlign, pDenDiagSet)) {
00449     for (i=pDenDiagSet->begin(); i!=pDenDiagSet->end(); i++) {
00450       Len += (*i)->GetLen();
00451     }
00452   }
00453   return(Len);
00454 
00455 }
00456 
00457 int  GetLowerBound(const CRef< CSeq_align >& seqAlign, bool onMaster) {
00458 
00459     int  lowerBound = -1;
00460     if (seqAlign.Empty()) {
00461         return lowerBound;
00462     }
00463 
00464     const TDendiag* pDenDiagSet;  // = new TDendiag;
00465     if (GetDDSetFromSeqAlign(*seqAlign, pDenDiagSet)) {
00466         lowerBound = (onMaster) ? pDenDiagSet->front()->GetStarts().front() : pDenDiagSet->front()->GetStarts().back();
00467     }
00468     return(lowerBound);
00469 
00470 }
00471 
00472 int  GetUpperBound(const CRef< CSeq_align >& seqAlign, bool onMaster) {
00473     int  upperBound = -1;
00474     if (seqAlign.Empty()) {
00475         return upperBound;
00476     }
00477 
00478     const TDendiag* pDenDiagSet;  // = new TDendiag;
00479     if (GetDDSetFromSeqAlign(*seqAlign, pDenDiagSet)) {
00480         upperBound = (onMaster) ? pDenDiagSet->back()->GetStarts().front() : pDenDiagSet->back()->GetStarts().back();
00481         upperBound += pDenDiagSet->back()->GetLen() - 1;
00482     }
00483     return(upperBound);
00484 
00485 }
00486 
00487 //  Use the alignment to extract as a single string those residues that are aligned.
00488 //  If pAlignedRes hasn't been allocated, do so.
00489 void SetAlignedResiduesOnSequence(const CRef< CSeq_align >& align, const string& sequenceString, char*& pAlignedRes, bool isMaster) {
00490 
00491     int length;
00492     int alignedResCtr = 0;
00493     int start = -1, stop = -1;
00494     CRef< CDense_diag > ddFirst, ddLast;
00495     
00496     if (align.Empty() || sequenceString.size() < 1) {
00497         return;
00498     }
00499 
00500     length = GetNumAlignedResidues(align);
00501     if (length < 1 || (int) sequenceString.size() < length) {
00502         return;
00503     } else {
00504         //  Allocate space for pAlignedRes if not already done
00505         if (!pAlignedRes) {
00506             pAlignedRes = new char[length];
00507             if (!pAlignedRes) return;
00508         }
00509     }
00510 
00511     if (GetFirstOrLastDenDiag(align, true, ddFirst) && GetFirstOrLastDenDiag(align, false, ddLast)) {
00512         if (ddFirst.NotEmpty() && ddLast.NotEmpty()) {
00513             start = (isMaster) ? ddFirst->GetStarts().front() : ddFirst->GetStarts().back();
00514             stop  = (isMaster) ? ddLast->GetStarts().front()  : ddLast->GetStarts().back();
00515             stop += ddLast->GetLen() - 1;
00516         }
00517     }
00518 
00519     alignedResCtr = 0;
00520     const TDendiag* pDenDiagSet;  // = new TDendiag;
00521     if (GetDDSetFromSeqAlign(*align, pDenDiagSet)) {
00522 //        if (start >=0 && start < length && stop >=0 && stop < length) {
00523         if (start >=0 && start <= stop && stop < (int) sequenceString.size()) {
00524             for (int i = start; i <= stop; ++i) {
00525                 if (IsPositionAligned(pDenDiagSet, i, isMaster) && alignedResCtr < length) {
00526                     //ASSERT(alignedResCtr < length);
00527                     pAlignedRes[alignedResCtr] = sequenceString[i];
00528                     ++alignedResCtr;
00529                 }
00530             }
00531         }
00532     }
00533 
00534     //  problem if alignedResCtr != length; return null pointer
00535     if (alignedResCtr != length) {
00536         delete pAlignedRes;
00537         pAlignedRes = NULL;
00538     }
00539     
00540 }
00541 
00542 
00543 //===========================================
00544 //  Queries on block structure of alignment
00545 //===========================================
00546 
00547 /*  ADDED  10/28/03 */
00548 //  return block number containing residue, or -1 if not aligned or out of range.
00549 int GetBlockNumberForResidue(int residue, const CRef< CSeq_align >& seqAlign, bool onMaster,
00550                              vector<int>* starts, vector<int>* lengths) {
00551     int i = 0;
00552     int result = -1, nBlocks;
00553     vector<int> vstarts, vlengths;
00554 
00555     if (residue >= 0 && GetBlockLengths(seqAlign, vlengths) > 0 && GetBlockStarts(seqAlign, vstarts, onMaster) > 0) {
00556         if (vlengths.size() == vstarts.size()) {
00557             nBlocks = vstarts.size();
00558             while (i < nBlocks && result < 0) {
00559                 if (residue >= vstarts[i] && residue < vstarts[i] + vlengths[i]) {
00560                     result = i;
00561                 }
00562                 ++i;
00563             }
00564             if (starts != NULL) {
00565                 starts->insert(starts->begin(), vstarts.begin(), vstarts.end());
00566             }
00567             if (lengths != NULL) {
00568                 lengths->insert(lengths->begin(), vlengths.begin(), vlengths.end());
00569             }
00570         }
00571     }
00572     return result;
00573 }
00574 
00575 /*  ADDED  */
00576 // return number of blocks in alignment (0 if no alignment, or not a Dense_diag)
00577 int GetBlockCount(const CRef< CSeq_align >& seqAlign) {
00578     int nBlocks = 0;
00579     if (seqAlign.Empty()) {
00580         return nBlocks;
00581     }
00582     if (seqAlign->GetSegs().IsDendiag()) {
00583         nBlocks = seqAlign->GetSegs().GetDendiag().size();
00584     }
00585     return nBlocks;
00586 }
00587 
00588 
00589 //  return number of blocks on success; return 0 on error
00590 int GetBlockLengths(const CRef< CSeq_align >& seqAlign, vector<int>& lengths) {
00591     int count = 0;
00592     int nBlocks = GetBlockCount(seqAlign);
00593     const TDendiag* pDenDiagSet = NULL;
00594     TDendiag_cit cit;
00595 
00596     if (seqAlign.NotEmpty() && nBlocks > 0) {
00597         lengths.clear();
00598         if (GetDDSetFromSeqAlign(*seqAlign, pDenDiagSet)) {
00599             for (cit = pDenDiagSet->begin(); cit != pDenDiagSet->end(); ++cit) {
00600                 lengths.push_back((*cit)->GetLen());
00601                 count++;
00602             }
00603         }
00604     } 
00605     count = (count == nBlocks) ? count: 0;
00606     return count;
00607 }
00608 
00609 
00610 //  convenience method; return number of blocks on success; return 0 on error
00611 int GetBlockStartsForMaster(const CRef< CSeq_align >& seqAlign, vector<int>& starts) {
00612     return GetBlockStarts(seqAlign, starts, true);
00613 }
00614 
00615 
00616 //  return number of blocks on success; return 0 on error
00617 int GetBlockStarts(const CRef< CSeq_align >& seqAlign, vector<int>& starts, bool onMaster) {
00618     int start;
00619     int count = 0;
00620     int nBlocks = GetBlockCount(seqAlign);
00621     const TDendiag* pDenDiagSet = NULL;
00622     TDendiag_cit cit;
00623 
00624     if (seqAlign.NotEmpty() && nBlocks > 0) {
00625         starts.clear();
00626         if (GetDDSetFromSeqAlign(*seqAlign, pDenDiagSet)) {
00627             for (cit = pDenDiagSet->begin(); cit != pDenDiagSet->end(); ++cit) {
00628                 start = (onMaster) ? (*cit)->GetStarts().front() : (*cit)->GetStarts().back();
00629                 starts.push_back(start);
00630                 count++;
00631             }
00632         }
00633     } 
00634     count = (count == nBlocks) ? count: 0;
00635     return count;
00636 }
00637 
00638 bool GetDDSetFromSeqAlign(const CSeq_align& align, const TDendiag*& dd) {
00639     if (align.GetSegs().IsDendiag()) {
00640         dd = &(align.GetSegs().GetDendiag());
00641         return true;
00642     }
00643     return false;
00644 }
00645 
00646 
00647 bool GetDDSetFromSeqAlign(CSeq_align& align, TDendiag*& dd) {
00648     if (align.SetSegs().IsDendiag()) {
00649         dd = &(align.SetSegs().SetDendiag());
00650         return true;
00651     }
00652     return false;
00653 }
00654 
00655 
00656 bool GetFirstOrLastDenDiag(const CRef< CSeq_align >& seqAlign, bool First, CRef< CDense_diag >& DenDiag) {
00657 //-------------------------------------------------------------------------
00658 // get either the first or last dense-diag of the seqAlign
00659 //-------------------------------------------------------------------------
00660   const TDendiag* pDenDiagSet;                     // (TDendiag = list<CRef<CDense_diag>>)
00661   TDendiag_cit k;
00662 
00663   if (seqAlign.NotEmpty() && GetDDSetFromSeqAlign(*seqAlign, pDenDiagSet)) {
00664 
00665     if (First) {
00666       k = pDenDiagSet->begin();
00667     }
00668     else {
00669       k = pDenDiagSet->end();
00670       k--;
00671     }
00672     DenDiag = (*k);
00673     return(true);
00674   }
00675   return(false);
00676 }
00677 
00678 bool CheckSeqIdInDD(const CRef< CSeq_align >& seqAlign)
00679 {
00680     int iii;
00681     const TDendiag* pDenDiagSet;     // (TDendiag = list<CRef<CDense_diag>>)
00682     TDendiag_cit k;
00683     CDense_diag::TIds IdsSet;
00684     CDense_diag::TIds::iterator i;
00685     CRef< CSeq_id > master, slave, master2, slave2;
00686     if (seqAlign.NotEmpty() && GetDDSetFromSeqAlign(*seqAlign, pDenDiagSet)) 
00687     {
00688         iii=0;
00689         k = pDenDiagSet->begin();
00690         IdsSet = (*k)->GetIds();
00691         i = IdsSet.begin();
00692         master = *i;
00693         i++;
00694         slave = *i;
00695         k++;iii++;
00696         for (; k != pDenDiagSet->end(); k++, iii++)
00697         {
00698             IdsSet = (*k)->GetIds();
00699             i = IdsSet.begin();
00700             master2 = *i;
00701             i++;
00702             slave2 = *i;
00703             if (!(SeqIdsMatch(master, master2)) || !SeqIdsMatch(slave, slave2))
00704                 return false;
00705         }
00706     }
00707     return true;
00708 }
00709 
00710 /*
00711 _/_/_/_/_/_/_/_/_/_/_/_/_/_/
00712 _/
00713 _/ DD<->SeqLoc transfer Functions
00714 _/
00715 _/_/_/_/_/_/_/_/_/_/_/_/_/_/
00716 */
00717 
00718 
00719 void MakeDDFromSeqLoc(CSeq_loc * pAl,TDendiag * pDD ) {
00720         int from, to;
00721 
00722         if (!pAl) return;
00723         // make a DD from AlignAnnot
00724         //if (pAl->GetLocation().IsInt()) {
00725         if (pDD && pAl->IsInt()) {
00726                 //CSeq_interval& interval = pAl->SetLocation().SetInt();
00727                 CSeq_interval& interval = pAl->SetInt();
00728                 from=interval.SetFrom();
00729                 to=interval.SetTo();
00730                 CRef< CSeq_id > RefID(new CSeq_id);
00731                 RefID = &interval.SetId();
00732                 AddIntervalToDD(pDD,RefID,RefID,from,from,to-from+1);
00733         //}  else if( pAl->GetLocation().IsPacked_int() ) {
00734         }  else if(pDD && pAl->IsPacked_int() ) {
00735                 CPacked_seqint::Tdata::iterator s;        
00736                 for (s=pAl->SetPacked_int().Set().begin(); s!=pAl->SetPacked_int().Set().end(); s++) {
00737                         //CSeq_interval& interval = (*s);
00738                         from=(*s)->GetFrom();
00739                         to=(*s)->GetTo();
00740                         CRef< CSeq_id > RefID(new CSeq_id);
00741                         RefID = &((*s)->SetId());
00742                         AddIntervalToDD(pDD,RefID,RefID ,from,from,to-from+1);
00743                 }
00744         }
00745 }
00746 
00747 
00748 void MakeSeqLocFromDD(const TDendiag * pDD, CSeq_loc * pAl) {
00749         TDendiag_cit  pp;
00750         int iDst;
00751         CDense_diag::TStarts::const_iterator  pos;
00752         vector < CRef< CSeq_id > >::const_iterator pid;
00753 
00754         for (iDst=0,pp=pDD->begin(); pp!=pDD->end(); pp++,iDst++){
00755                 pos=(*pp)->GetStarts().begin();
00756                 TSeqPos len=((*pp)->GetLen());
00757                 TSeqPos posStart=*pos;
00758                 pid=(*pp)->GetIds().begin();
00759                 //CRef<CSeq_id> SeqID=*(++pid);
00760                 CRef<CSeq_id> SeqID=*(pid);
00761                 
00762                 if(pDD->size()==1){
00763                         pAl->SetInt().SetFrom(posStart);
00764                         pAl->SetInt().SetTo(posStart+len-1);
00765                         pAl->SetInt().SetId(*SeqID);
00766                 }else {
00767                         //CSeq_interval * intrvl = new CSeq_interval();
00768                         //CRef< CSeq_interval > intrvl = new CSeq_interval();
00769                         CRef < CSeq_interval  > intrvl(new CSeq_interval());
00770                         intrvl->SetFrom(posStart);
00771                         intrvl->SetTo(posStart+len-1);
00772                         intrvl->SetId(*SeqID);
00773                         pAl->SetPacked_int().Set().push_back(intrvl);
00774                 }
00775         }
00776 }
00777 
00778 void AddIntervalToDD(TDendiag * pDD,CRef<CSeq_id> seqID1, CRef<CSeq_id> seqID2,TSeqPos st1,TSeqPos st2, TSeqPos lll)
00779 // Fake it 
00780 {
00781         CRef< CSeq_id > idMaster(new CSeq_id);
00782         idMaster.Reset(seqID1);
00783         CRef< CSeq_id > idSeq(new CSeq_id);
00784         idSeq.Reset(seqID2);
00785                 
00786         CRef<CDense_diag> newDD(new CDense_diag);
00787         newDD->SetDim(2);
00788         //newDD->SetIds().push_back(seqID1);
00789         //newDD->SetIds().push_back(seqID2);
00790         newDD->SetIds().push_back(idMaster);
00791         newDD->SetIds().push_back(idSeq);
00792         newDD->SetStarts().push_back(st1);
00793         newDD->SetStarts().push_back(st2);
00794         newDD->SetLen()=lll;
00795         pDD->push_back(newDD); // apend to the DensDiag List
00796 }
00797 
00798 
00799 bool GetDenDiagSet(const CRef< CSeq_annot >& seqAnnot, int Row, const TDendiag*& pDenDiagSet) {
00800 //-------------------------------------------------------------------------
00801 // the same as SetDenDiagSet, but insure that the returned
00802 // den-diag-set is const.
00803 //-------------------------------------------------------------------------
00804 //  TDendiag* pTempDenDiagSet;
00805 //  bool RetVal;
00806 //  RetVal = SetDenDiagSet(seqAnnot, Row, pTempDenDiagSet);
00807 //  pDenDiagSet = pTempDenDiagSet;
00808 //  return(RetVal);
00809     list< CRef< CSeq_align > >::const_iterator j;
00810 
00811     if (seqAnnot->GetData().IsAlign()) {
00812        // figure out which dense-diag set to get (based on Row)
00813        if (Row == 0) j = seqAnnot->GetData().GetAlign().begin();
00814        else {
00815          int Count = 0;
00816          for (j= seqAnnot->GetData().GetAlign().begin();
00817               j!= seqAnnot->GetData().GetAlign().end(); j++) {
00818            if (++Count == Row) break;
00819          }
00820        }
00821        if ((*j)->GetSegs().IsDendiag()) {
00822          // get the dense-diag set
00823          pDenDiagSet = &((*j)->GetSegs().GetDendiag());
00824          return(true);
00825        }
00826     }
00827     return(false);
00828 }
00829 
00830 bool SetDenDiagSet(CRef< CSeq_annot >& seqAnnot, int Row, TDendiag*& pDenDiagSet) {
00831 //-------------------------------------------------------------------------
00832 // get a set of dense-diag's.  this is dense-diag info for a row.
00833 // for Row = 0, and Row = 1, return the same DenDiagSet.
00834 //-------------------------------------------------------------------------
00835     list< CRef< CSeq_align > >::iterator j;
00836 
00837     if (seqAnnot->GetData().IsAlign()) {
00838        // figure out which dense-diag set to get (based on Row)
00839        if (Row == 0) j = seqAnnot->SetData().SetAlign().begin();
00840        else {
00841          int Count = 0;
00842          for (j= seqAnnot->SetData().SetAlign().begin();
00843               j!= seqAnnot->SetData().SetAlign().end(); j++) {
00844            if (++Count == Row) break;
00845          }
00846        }
00847        if ((*j)->SetSegs().IsDendiag()) {
00848          // get the dense-diag set
00849          pDenDiagSet = &((*j)->SetSegs().SetDendiag());
00850          return(true);
00851        }
00852     }
00853     return(false);
00854 }
00855 
00856 //  Moved from the validator...
00857 void BuildAdjacentDiags(const TDendiag_cit& begin_orig, const TDendiag_cit& end_orig, TDendiag* adj)
00858 {
00859     
00860     //  Go through the dense_diags list and identify all adjacent blocks,
00861     //  namely those with no unaligned residues between them.  In the third
00862     //  argument 'adj', fill the dense list with only non-adjacent dense_diags,
00863     //  merging any adjacent ones found in the original list.
00864     
00865     int start, len, start_adj, len_adj;
00866     int start_slave, start_adj_slave;
00867     bool appended = false;
00868     
00869     CRef<CDense_diag> dd_cref;
00870     TDendiag_cit orig_ci;
00871     TDendiag_it adj_ci;
00872     CDense_diag::TStarts::iterator start_adj_i;
00873     
00874     //  loop over original set of starts on master
00875     for (orig_ci = begin_orig; orig_ci != end_orig; ++orig_ci) {
00876         appended = false;
00877         
00878         start = (*orig_ci)->GetStarts().front();
00879         start_slave = (*orig_ci)->GetStarts().back();
00880         len   = (*orig_ci)->GetLen();
00881         
00882         //  Look at list of new dense_diags and see if any are adjacent to original.
00883         //  Both aligned regions in the dense diag must be collapseable, unless
00884         //  one is validating a new master in the child.
00885         for (adj_ci = adj->begin(); adj_ci != adj->end(); ++adj_ci) {
00886             start_adj = (*adj_ci)->GetStarts().front();
00887             start_adj_slave = (*adj_ci)->GetStarts().back();
00888             len_adj   = (*adj_ci)->GetLen();
00889             
00890             if (start == start_adj + len_adj && start_slave == start_adj_slave + len_adj) {
00891                 // append *orig_ci range to *adj_ci; starts remain unchanged
00892                 (*adj_ci)->SetLen(len + len_adj);
00893                 appended = true;
00894             } else if (start + len == start_adj && start_slave + len == start_adj_slave) {
00895                 
00896                 // prepend *orig_ci range to *adj_ci; need to update all starts
00897                 for (start_adj_i  = (*adj_ci)->SetStarts().begin();
00898                 start_adj_i != (*adj_ci)->SetStarts().end(); ++start_adj_i) {
00899                     *start_adj_i -= len;                    
00900                 }
00901                 (*adj_ci)->SetLen(len + len_adj);
00902                 appended = true;
00903             }
00904             
00905         }
00906         if (!appended) {
00907             dd_cref = new CDense_diag();
00908             dd_cref->Assign(**orig_ci);
00909             adj->push_back(dd_cref);
00910         }
00911     }
00912 }
00913 
00914 
00915 bool EraseRow(CRef< CSeq_annot >& seqAnnot, int RowIndex) {
00916 //-------------------------------------------------------------------------
00917 // Erase the RowIndex-1 seq-align.  don't erase RowIndex 0.
00918 //-------------------------------------------------------------------------
00919     list< CRef< CSeq_align > >::iterator j, jend;
00920     int  RowCount;
00921 
00922     if (RowIndex == 0) return(false);
00923 
00924     if (seqAnnot->GetData().IsAlign()) {
00925         RowCount = 1;
00926         jend = seqAnnot->SetData().SetAlign().end();
00927         for (j= seqAnnot->SetData().SetAlign().begin(); j != jend; j++) {
00928             if (RowCount == RowIndex) {
00929                 seqAnnot->SetData().SetAlign().erase(j);
00930                 return(true);
00931             }
00932             RowCount++;
00933             if (RowCount > RowIndex) break;
00934         }
00935     }
00936     return(false);
00937 }
00938 
00939 //input seqAlign may actually contain CSeq_align_set
00940 CRef< CSeq_align > ExtractFirstSeqAlign(CRef< CSeq_align > seqAlign)
00941 {
00942     if (seqAlign.Empty())
00943         return seqAlign;
00944     if (!seqAlign->GetSegs().IsDisc())
00945         return seqAlign;
00946     if (seqAlign->GetSegs().GetDisc().CanGet())
00947     {
00948         const list< CRef< CSeq_align > >& saList = seqAlign->GetSegs().GetDisc().Get();
00949         if (saList.begin() != saList.end())
00950             return ExtractFirstSeqAlign(*saList.begin());
00951     }
00952     CRef< CSeq_align > nullRef;
00953     return nullRef;
00954 }
00955 
00956 
00957 int ddLen(TDendiag * pDD)
00958 {
00959         TDendiag_cit  pp;
00960         int staLen=0;
00961         
00962         for (pp=pDD->begin(); pp!=pDD->end(); pp++) 
00963         {
00964             staLen+=((*pp)->GetLen());
00965         }
00966         
00967         return staLen;
00968 }
00969 
00970 
00971 string ddAlignInfo(TDendiag * pGuideDD)
00972 {
00973         TDendiag_cit  ppGuide;
00974         int iDst;
00975         CDense_diag::TStarts::const_iterator  pos;
00976         vector < CRef< CSeq_id > >::const_iterator pid;
00977         string ret="";
00978         char buf[1024];
00979 
00980         for (iDst=0,ppGuide=pGuideDD->begin(); ppGuide!=pGuideDD->end(); ppGuide++,iDst++){
00981                 pos=(*ppGuide)->GetStarts().begin();
00982                 TSeqPos lenGuide=((*ppGuide)->GetLen());
00983                 TSeqPos posMasterGuide=*pos;
00984                 TSeqPos posSeqGuide=*(++pos);
00985 
00986                 pid=(*ppGuide)->GetIds().begin();
00987                 CRef<CSeq_id> GuideMasterSeqID=*(pid);
00988                 CRef<CSeq_id> GuideSeqID=*(++pid);
00989                 
00990                 sprintf(buf,"[%s]/[%s](%d)  ",GetSeqIDStr(GuideMasterSeqID).c_str(),GetSeqIDStr(GuideSeqID).c_str(),(int)pGuideDD->size());
00991                 if(!iDst){
00992                     ret+=buf;
00993                 }
00994                 sprintf(buf,"#%d=[%d-%d]/[%d-%d](%d) ",iDst,posMasterGuide+1,posMasterGuide+lenGuide,posSeqGuide+1,posSeqGuide+lenGuide,lenGuide);
00995                 ret+=buf;
00996         }
00997         return ret;
00998 }
00999 
01000 int ddRecompose(TDendiag * pGuideDD,int iMaster, int iSeq,TDendiag * pResultDD)
01001 {
01002     TDendiag_it  ppGuide;
01003     int iDst;
01004     CDense_diag::TStarts::iterator  pos,ppos;
01005     vector < CRef< CSeq_id > >::iterator pid,ppid;
01006 
01007     for (iDst=0,ppGuide=pGuideDD->begin(); ppGuide!=pGuideDD->end(); ppGuide++,iDst++){
01008             ppos=pos=(*ppGuide)->SetStarts().begin();
01009             TSeqPos lenGuide=((*ppGuide)->GetLen());
01010             TSeqPos posMasterGuide=*pos;
01011             TSeqPos posSeqGuide=*(++pos);
01012             // exchange starts
01013             //*ppos=posSeqGuide;
01014             //*pos=posMasterGuide; 
01015 
01016             ppid=pid=(*ppGuide)->SetIds().begin();
01017             CRef<CSeq_id> GuideMasterSeqID=*(pid);
01018             CRef<CSeq_id> GuideSeqID=*(++pid);
01019             // exchage ids
01020             //*ppid=GuideSeqID;
01021             //*pid=GuideMasterSeqID;
01022             
01023             AddIntervalToDD(pResultDD,iMaster==0 ? GuideMasterSeqID : GuideSeqID , iSeq==0 ? GuideMasterSeqID : GuideSeqID ,iMaster==0 ? posMasterGuide : posSeqGuide ,iSeq==0 ? posMasterGuide : posSeqGuide , lenGuide);
01024     }
01025     return iDst;
01026 }
01027 
01028 int ddRenameSeqID(TDendiag * pGuideDD,int iNum, CRef< CSeq_id > & seqID)
01029 {
01030     TDendiag_it  ppGuide;
01031     int iDst;
01032     vector < CRef< CSeq_id > >::iterator pid;
01033 
01034     for (iDst=0,ppGuide=pGuideDD->begin(); ppGuide!=pGuideDD->end(); ppGuide++,iDst++){
01035             
01036             CRef< CSeq_id > idCopy(new CSeq_id);
01037             idCopy.Reset(seqID);
01038 
01039             pid=(*ppGuide)->SetIds().begin();
01040             if(iNum)++pid;
01041             *(pid)=idCopy;
01042     }
01043     return iDst;
01044 }
01045 
01046 
01047 bool ddAreEquivalent(const TDendiag * pDD1, const TDendiag * pDD2, bool checkMasters)
01048 {
01049     TDendiag_cit  pp1,pp2;
01050     CDense_diag::TStarts::const_iterator  pos1,pos2;
01051     vector < CRef< CSeq_id > >::const_iterator pid1,pid2;
01052     bool isSimilar=true;
01053     
01054     if(pDD1->size()!=pDD2->size())
01055             return false;
01056 
01057     for (pp1=pDD1->begin(),pp2=pDD2->begin(); pp1!=pDD1->end() && pp2!=pDD2->end(); pp1++,pp2++){
01058         pos1=(*pp1)->GetStarts().begin();
01059         TSeqPos lenGuide1=((*pp1)->GetLen());
01060         TSeqPos posMasterGuide1=*pos1;
01061         TSeqPos posSeqGuide1=*(++pos1);
01062         pid1=(*pp1)->GetIds().begin();
01063         CRef<CSeq_id> idMas1=*(pid1);
01064         CRef<CSeq_id> idSlv1=*(++pid1);
01065                 
01066         pos2=(*pp2)->GetStarts().begin();
01067         TSeqPos lenGuide2=((*pp2)->GetLen());
01068         TSeqPos posMasterGuide2=*pos2;
01069         TSeqPos posSeqGuide2=*(++pos2);
01070         pid2=(*pp2)->GetIds().begin();
01071         CRef<CSeq_id> idMas2=*(pid2);
01072         CRef<CSeq_id> idSlv2=*(++pid2);
01073         
01074         
01075         if( !SeqIdsMatch(idSlv1, idSlv2) ||
01076             lenGuide1!=lenGuide2 || 
01077             posMasterGuide1!=posMasterGuide2 || 
01078             posSeqGuide1!=posSeqGuide2){
01079             isSimilar=false;
01080             break;
01081         }
01082 
01083         if (checkMasters && !SeqIdsMatch(idMas1, idMas2)) {
01084             isSimilar=false;
01085             break;
01086         }
01087     }
01088     return isSimilar;
01089 }
01090 
01091     
01092 
01093 
01094 
01095 
01096 /*
01097 _/_/_/_/_/_/_/_/_/_/_/_/_/_/
01098 _/
01099 _/ Alignment remapping functions
01100 _/
01101 _/_/_/_/_/_/_/_/_/_/_/_/_/_/
01102 */
01103 typedef struct {
01104         int hits;
01105         int crdSeq[4];
01106         CRef< CSeq_id > seqID[4];
01107         int secNum[4];
01108 }ALICORD;
01109 
01110 static int ddAcumAliCord(TDendiag * pDD, int interRow, ALICORD * acL,int seqRow)
01111 {
01112         TDendiag_cit  pp;
01113         vector < CRef< CSeq_id > >::const_iterator pID;
01114         int maxPos=0,i,iSec;
01115         CDense_diag::TStarts::const_iterator  pos;
01116 
01117 
01118         for (iSec=0,pp=pDD->begin(); pp!=pDD->end(); pp++,iSec++) 
01119         {
01120                 pos=(*pp)->GetStarts().begin();
01121                 TSeqPos len=((*pp)->GetLen());
01122                 TSeqPos posSeq=*(pos);
01123                 TSeqPos posInter=*(++pos);
01124 
01125                 pID=(*(pDD->begin()))->GetIds().begin();
01126                 CRef< CSeq_id > idSeq=*(pID);
01127                 CRef< CSeq_id > idInter=*(++pID);
01128 
01129                 if(interRow==0){ // swap to the requested row 
01130                         TSeqPos tmp=posSeq;posSeq=posInter;posInter=tmp;
01131                         CRef< CSeq_id > tmi=idSeq;idSeq=idInter;idInter=tmi;
01132                 }
01133                 
01134                 
01135                 if(!acL){ // just get the maximum size - used to allocate the acL bufer in calling function
01136                     if(maxPos < (int) (posInter+len) )maxPos = posInter+len;
01137                 }
01138                 else {
01139                     for(i=posInter;i< (int) (posInter+len);i++){
01140                                 acL[i].hits++;
01141                                 acL[i].crdSeq[0]=i;
01142                                 acL[i].crdSeq[seqRow]=posSeq+(i-posInter);
01143                                 acL[i].seqID[0]=idInter;
01144                                 acL[i].seqID[seqRow]=idSeq;
01145                                 acL[i].secNum[seqRow]=iSec;
01146                                 acL[i].secNum[0]=iSec;
01147                         }
01148                         
01149                 }
01150         }
01151         
01152         return maxPos;
01153 }
01154 
01155 
01156 static int ddScanAliCord(TDendiag * pDDList, ALICORD * acL, int maxLen,int rowMaster,int rowSeq,int iRowFollowStructure,int hitCnt)
01157 {
01158         // restore overlap alignments   
01159         int iCnt=0,i,is,ie;
01160 
01161         for(i=0;i<maxLen;)
01162         {
01163                 // determine  starts andend of overlap block
01164                 if(acL[i].hits!=hitCnt){
01165                     //if(acL[i].hits>hitCnt)return 0; // error , this shouldn't happen
01166                     if(acL[i].hits>3)return 0; // error , this shouldn't happen
01167                     i++;continue;
01168                 }
01169                 is=i;
01170                 //while(acL[i].hits==hitCnt && acL[i].secNum==acL[is].secNum){
01171                 
01172                 while(acL[i].hits==hitCnt){
01173                         if(iRowFollowStructure!=-1){
01174                             if(acL[i].secNum[iRowFollowStructure]!=acL[is].secNum[iRowFollowStructure])
01175                                 break;
01176                         } else {
01177                             if( acL[i].secNum[rowMaster]!=acL[is].secNum[rowMaster] || 
01178                                  acL[i].secNum[rowSeq]!=acL[is].secNum[rowSeq] )
01179                                     break;
01180                         }
01181                         // if the sequence has a gap there 
01182                         if(i>is && acL[i].crdSeq[rowSeq]!=acL[i-1].crdSeq[rowSeq]+1)
01183                             break;
01184                         i++;
01185                 }
01186                 ie=i;
01187                 
01188                 TSeqPos posMasterNew=acL[is].crdSeq[rowMaster];
01189                 TSeqPos posSeqNew=acL[is].crdSeq[rowSeq];
01190                 TSeqPos lenNew=ie-is;
01191                 //CRef< CSeq_id > idMaster=acL[is].seqID[rowMaster];
01192                 //CRef< CSeq_id > idSeq=acL[is].seqID[rowSeq];
01193                 CRef< CSeq_id > idMaster(new CSeq_id);
01194                 idMaster.Reset(acL[is].seqID[rowMaster]);
01195                 CRef< CSeq_id > idSeq(new CSeq_id);
01196                 idSeq.Reset(acL[is].seqID[rowSeq]);
01197                 
01198 
01199                 // fill the new dens Diag block information
01200                 {
01201                         CRef<CDense_diag> newDD(new CDense_diag);
01202                         newDD->SetDim(2);
01203                         newDD->SetIds().push_back(idMaster);
01204                         newDD->SetIds().push_back(idSeq);
01205                         newDD->SetStarts().push_back(posMasterNew);
01206                         newDD->SetStarts().push_back(posSeqNew);
01207                         newDD->SetLen()=lenNew;
01208 
01209                         pDDList->push_back(newDD); // apend to the DensDiag List
01210                         iCnt++;
01211                 }
01212 
01213         }
01214         return iCnt;
01215 }
01216 
01217 int ddRemap(TDendiag * pSrcDD,int iSeq,TDendiag * pGuideDD, int iMaster,TDendiag * newDDlist,int iMasterNew, int iSeqNew,int flags,string err)
01218 {
01219         // determine the length of the intermediate sequence alignment
01220         int maxLen2=ddAcumAliCord(pGuideDD,1-iMaster,0,1);
01221         int maxLen1=ddAcumAliCord(pSrcDD,1-iSeq,0,2);
01222         int maxLen=maxLen1>maxLen2 ? maxLen1 : maxLen2;maxLen++;
01223         int iFollow=-1;
01224 
01225         // allocate the buffer to keep coverage ALICORDs
01226         ALICORD* allArr=(ALICORD * )malloc(sizeof(ALICORD)* maxLen);
01227         if(!allArr) {
01228             err="remapDD error: couldn't allocate enough memory.";
01229             return 0;
01230         }
01231 
01232         memset(allArr,0,maxLen*sizeof(ALICORD));
01233 
01234         // accumulate ALICORDS
01235         //string debug1=ddAlignInfo(pSrcDD);
01236         //  string debug2=ddAlignInfo(pGuideDD);
01237 
01238 
01239         if(flags&DD_FOLLOWGUIDE){
01240             ddAcumAliCord(pSrcDD,1-iSeq,allArr,2);
01241             ddAcumAliCord(pGuideDD,1-iMaster,allArr,1);
01242             iFollow=0;
01243         }else {
01244             ddAcumAliCord(pGuideDD,1-iMaster,allArr,1);
01245             ddAcumAliCord(pSrcDD,1-iSeq,allArr,2);
01246             iFollow=0;
01247         }
01248         // restore overlap alignments   
01249         int iCnt=ddScanAliCord(newDDlist,allArr,maxLen,iMasterNew,iSeqNew,iFollow,2);
01250 
01251         free( (void * )allArr) ;
01252         return iCnt;
01253 }
01254 
01255 string ddDifferenceResidues(TDendiag * pSrcDD,TDendiag * pGuideDD,TDendiag * newDDlist)
01256 {
01257         TDendiag DifferenceDD;
01258         if(!newDDlist)newDDlist=&DifferenceDD;
01259         // determine the length of the intermediate sequence alignment
01260         int maxLen2=ddAcumAliCord(pGuideDD,0,0,1);
01261         int maxLen1=ddAcumAliCord(pSrcDD,0,0,2);
01262         int maxLen=maxLen1>maxLen2 ? maxLen1 : maxLen2;maxLen++;
01263         // allocate the buffer to keep coverage ALICORDs
01264         ALICORD * allArr=(ALICORD * )malloc(sizeof(ALICORD)* maxLen);if(!allArr)return NULL;
01265         memset(allArr,0,maxLen*sizeof(ALICORD));
01266 
01267         ddAcumAliCord(pSrcDD,0,allArr,2);
01268         ddAcumAliCord(pGuideDD,0,allArr,1);
01269 
01270         // restore overlap alignments   
01271         ddScanAliCord(newDDlist,allArr,maxLen,0,0,-1,1);
01272 
01273         free( (void * )allArr) ;
01274         return ddAlignInfo(newDDlist);
01275 }
01276 
01277 
01278 
01279 /*
01280 _/_/_/_/_/_/_/_/_/_/_/_/_/_/
01281 _/
01282 _/ Alignment sscanf/printf functions
01283 _/
01284 _/_/_/_/_/_/_/_/_/_/_/_/_/_/
01285 */
01286 
01287     #define scanTill( v_cond ) while(*ptr && *ptr!='\n' && (v_cond) )
01288 bool sscanSeqId (const char * & ptr,CSeq_id & seqid)
01289 {
01290     char typ[1024], id[1024];
01291     // empty th buffers 
01292     int ityp=0,iid=0;
01293     typ[ityp]=0;id[iid]=0;
01294 
01295     // scan the id 
01296     scanTill( *ptr==' ')ptr++; // skip spaces at the beginning of line 
01297     scanTill( *ptr!=' '){ // get the seqID type 
01298         typ[ityp++]=*ptr;
01299         ptr++;
01300     }typ[ityp]=0;
01301     scanTill( *ptr==' ')ptr++; // skip spaces between gi/pdb type and id itself
01302     scanTill( *ptr!=' '){ // get the seqID type 
01303         id[iid++]=*ptr;
01304         ptr++;
01305     }id[iid]=0;
01306     int gi;
01307     if( !strcmp(typ,"gi") && sscanf(id,"%d",&gi)==1 ){
01308         seqid.SetGi(gi);
01309     }
01310     else if( !strcmp(typ,"pdb") ){
01311         char * ss=strrchr(id,'_');
01312         //if there's a chain specified, separate it
01313         if( ss){
01314             *ss=0;
01315             ss++;
01316             seqid.SetPdb().SetChain(*ss);
01317         } 
01318         seqid.SetPdb().SetMol().Set(id);
01319     }
01320     else return false;
01321     return true;
01322 }
01323 
01324 const char * sscanSeqLocIntervals(const char * ptr, CSeq_loc & sq)
01325 {
01326     CSeq_id * sid=new CSeq_id();
01327     int howmany,from,to;
01328 
01329     //while(ptr && *ptr ){
01330         
01331         sscanSeqId (ptr,*sid);
01332         
01333         // scan each line
01334         scanTill(true){
01335             scanTill( *ptr==' ')ptr++; // skip spaces 
01336             if(!(howmany=sscanf(ptr,"%d-%d",&from,&to)))
01337                 break;
01338             
01339             if (howmany==1)to=from+1;
01340 
01341             CRef < CSeq_interval  > intrvl(new CSeq_interval());
01342             intrvl->SetFrom(from);
01343             intrvl->SetTo(to);
01344             intrvl->SetId(*sid);
01345             sq.SetPacked_int().Set().push_back(intrvl);
01346             scanTill( *ptr!=' ')ptr++; // skip spaces 
01347         }
01348 
01349 //      if(sq.GetPacked_int().Get().size())
01350 //          sl.push_back(sq);
01351 
01352         // next line 
01353         ptr=strchr(ptr,'\n');
01354         if(ptr)ptr++;
01355     //}
01356     return ptr;
01357 }
01358 
01359 //==============================================
01360 //  Query a SeqAlign for e-Values and bit scores
01361 //==============================================
01362 
01363 void ExtractScoreFromSeqAlign(const CRef< CSeq_align >& seqAlign, int flags, vector<double>&  scores) {
01364     ExtractScoreFromSeqAlign(seqAlign.GetPointer(), flags, scores);
01365 }
01366 
01367 void ExtractScoreFromSeqAlign(const CSeq_align* seqAlign, int flags, vector<double>&  scores) {
01368 
01369     int count=0;
01370     TDendiag_cit ddit;
01371 
01372     scores[0] = E_VAL_NOT_FOUND;
01373     scores[1] = SCORE_NOT_FOUND;  // raw score
01374     scores[2] = SCORE_NOT_FOUND;  // bit score
01375     scores[3] = SCORE_NOT_FOUND;  // number identical
01376 
01377     if (!seqAlign) {
01378         return;
01379     }
01380 
01381     if (seqAlign->IsSetScore()) {
01382         // score is at the top level of the seqAlign
01383         count = ExtractScoreFromScoreList(seqAlign->GetScore(), flags, scores);
01384     } else {
01385         // check individual dense-diags for score (this happens when
01386         // SeqAlignConvertDspToDdpList(...) from C-toolkit is called;
01387         // all dense_diags are set w/ the same score ... see make_seg_score())
01388         if (seqAlign->GetSegs().IsDendiag()) {
01389             const TDendiag ddList = seqAlign->GetSegs().GetDendiag();
01390             if (ddList.size() > 0) {
01391                 ddit = ddList.begin();
01392                 while (ddit != ddList.end() && count == 0) {
01393                     if ((*ddit)->IsSetScores()) {
01394                         count = ExtractScoreFromScoreList((*ddit)->GetScores(), flags, scores);
01395                     }
01396                     ++ddit;
01397                 }
01398             }
01399         //  Convert Dense_seg to Dense_diag and check for scores.
01400         //  Couldn't combine as ddList must be non-const here and const for
01401         //  Dense_diag case above.
01402         } else if (seqAlign->GetSegs().IsDenseg()) {
01403             TDendiag ddList;
01404             Denseg2DenseDiagList(seqAlign->GetSegs().GetDenseg(), ddList);
01405             if (ddList.size() > 0) {
01406                 ddit = ddList.begin();
01407                 while (ddit != ddList.end() && count == 0) {
01408                     if ((*ddit)->IsSetScores()) {
01409                         count = ExtractScoreFromScoreList((*ddit)->GetScores(), flags, scores);
01410                     }
01411                     ++ddit;
01412                 }
01413             }
01414         }
01415 
01416     }
01417 }
01418 
01419 
01420 int ExtractScoreFromScoreList(const CSeq_align::TScore& scores, int flags, vector<double>& values) {
01421 
01422     int count = 0;
01423 
01424 
01425     CSeq_align::TScore::const_iterator score_ci, score_ci_end = scores.end();
01426     for (score_ci=scores.begin(); score_ci!=score_ci_end; score_ci++) {
01427         if ((*score_ci)->IsSetId() && (*score_ci)->GetId().IsStr()) {           
01428             if ((flags&E_VALUE) && (*score_ci)->GetValue().IsReal() && (*score_ci)->GetId().GetStr() == "e_value") {
01429                 values[0] = (*score_ci)->GetValue().GetReal();
01430                 count++;
01431             }
01432             if ((flags&RAW_SCORE) && (*score_ci)->GetValue().IsInt() && (*score_ci)->GetId().GetStr() == "score") {
01433                 values[1] = (*score_ci)->GetValue().GetInt();
01434                 count++;
01435             }
01436             if ((flags&BIT_SCORE) && (*score_ci)->GetValue().IsReal() && (*score_ci)->GetId().GetStr() == "bit_score") {
01437                 values[2] = (*score_ci)->GetValue().GetReal();
01438                 count++;
01439             }
01440             if ((flags&N_IDENTICAL) && (*score_ci)->GetValue().IsInt() && (*score_ci)->GetId().GetStr() == "num_ident") {
01441                 values[3] = (*score_ci)->GetValue().GetInt();
01442                 count++;
01443             }
01444         }
01445     }
01446     return count;
01447 }
01448 
01449 //===========================================
01450 //  Functions to manipulate Dense_segs
01451 //===========================================
01452 
01453 CRef<CSeq_align> Denseg2DenseDiagList(const CRef<CSeq_align>& denseSegSeqAlign)
01454 {
01455     CRef<CSeq_align> newSa(new CSeq_align);
01456     newSa->Assign(*denseSegSeqAlign);
01457 
01458     if (denseSegSeqAlign.NotEmpty() && denseSegSeqAlign->GetSegs().IsDenseg()) {
01459         TDendiag ddList;
01460         Denseg2DenseDiagList(denseSegSeqAlign->GetSegs().GetDenseg(), ddList);
01461         newSa->SetSegs().SetDendiag() = ddList;
01462     }
01463 
01464     return newSa;
01465 }
01466 
01467 //  Function written by:  Kamen Todorov, NCBI
01468 //  Part of the objtools/alnmgr project; forked to here to avoid
01469 //  adding extra library dependencies.
01470 
01471 void Denseg2DenseDiagList(const CDense_seg& ds, TDendiag& ddl)
01472 {
01473     const CDense_seg::TIds&     ids     = ds.GetIds();
01474     const CDense_seg::TStarts&  starts  = ds.GetStarts();
01475     const CDense_seg::TStrands& strands = ds.GetStrands();
01476     const CDense_seg::TLens&    lens    = ds.GetLens();
01477     const CDense_seg::TScores&  scores  = ds.GetScores();
01478     const CDense_seg::TNumseg&  numsegs = ds.GetNumseg();
01479     const CDense_seg::TDim&     numrows = ds.GetDim();
01480     int                         total   = numrows * numsegs;
01481     int                         pos     = 0;
01482 
01483     int                         rows_per_seg;
01484 
01485     bool strands_exist = ((int) strands.size() == total);
01486     bool scores_exist = ((int) scores.size() == total);
01487     
01488     for (CDense_seg::TNumseg seg = 0; seg < numsegs; seg++) {
01489         rows_per_seg = 0;
01490         CRef<CDense_diag> dd (new CDense_diag);
01491         dd->SetLen(lens[seg]);
01492         for (CDense_seg::TDim row = 0; row < numrows; row++) {
01493             const TSignedSeqPos& start = starts[pos];
01494             if (start >=0) {
01495                 rows_per_seg++;
01496                 dd->SetIds().push_back(ids[row]);
01497                 dd->SetStarts().push_back(start);
01498                 if (strands_exist) {
01499                     dd->SetStrands().push_back(strands[pos]);
01500                 }
01501                 if (scores_exist) {
01502                     dd->SetScores().push_back(scores[pos]);
01503                 }
01504             }
01505             pos++;
01506         }
01507         if (rows_per_seg >= 2) {
01508             dd->SetDim(rows_per_seg);
01509             ddl.push_back(dd);
01510         }
01511     }
01512 }
01513 // simple and easy : added by Vahan to avoid usage of bunch of algAlignment...  classes
01514 bool GetPendingSeqId(CCdCore * pCD,int irow,CRef <CSeq_id> & seqID)
01515 {
01516     int i ;
01517     list <CRef <CUpdate_align> > ::iterator pPen;
01518     for(i=0,pPen=pCD->SetPending().begin();pPen!=pCD->SetPending().end();pPen++,i++){
01519         if(i<irow)
01520             continue;
01521         CSeq_align * pAl = *((*pPen)->SetSeqannot().SetData().SetAlign().begin());
01522         CDense_diag * pDDPen=*(pAl->SetSegs().SetDendiag().begin());
01523         vector < CRef< CSeq_id > >::const_iterator pid=pDDPen->GetIds().begin();
01524         seqID=*(++pid);
01525         return true;
01526     }
01527     return false;
01528 }
01529 
01530 bool GetPendingFootPrint(CCdCore * pCD,int irow,int * from, int * to)
01531 {
01532     int i ;
01533     list <CRef <CUpdate_align> > ::iterator pPen;
01534     TDendiag_cit pD ;
01535     CDense_diag::TStarts::const_iterator pid;
01536     CRef<CDense_diag > pDDPen;
01537     CSeq_align * pAl ;
01538 
01539     for(i=0,pPen=pCD->SetPending().begin();pPen!=pCD->SetPending().end();pPen++,i++){
01540         if(i<irow)
01541             continue;
01542         pAl = *((*pPen)->SetSeqannot().SetData().SetAlign().begin());
01543         pD= pAl->SetSegs().SetDendiag().begin();
01544         pDDPen=*(pD);
01545         pid=pDDPen->GetStarts().begin();
01546         *from=*(++pid);
01547 
01548         pD= pAl->SetSegs().SetDendiag().end();pD--;
01549         pDDPen=*(pD);
01550         pid=pDDPen->GetStarts().begin();
01551         (*to)=*(++pid);
01552         (*to)+=pDDPen->GetLen()-1;
01553         return true;
01554     }
01555     return false;
01556 }
01557 bool GetPendingDD(CCdCore * pCD,int irow,TDendiag* & pDenDiagSet)
01558 {
01559     int i ;
01560     list <CRef <CUpdate_align> > ::iterator pPen;
01561     for(i=0,pPen=pCD->SetPending().begin();pPen!=pCD->SetPending().end();pPen++,i++){
01562         if(i<irow)
01563             continue;
01564         CSeq_align * pAl = *((*pPen)->SetSeqannot().SetData().SetAlign().begin());
01565         pDenDiagSet=&(pAl->SetSegs().SetDendiag());
01566         
01567         
01568         return true;
01569     }
01570     return false;
01571 }
01572 
01573 //  Assumes that the Seq_align passed is a pairwise (dim = 2) alignment of
01574 //  a sequence to a pssm, where the pssm is the second Id.  Such alignments
01575 //  are obtained via RPSBlast and provided by the CDart API.
01576 int GetPssmIdFromSeqAlign(const CRef<CSeq_align >& seqAlign, string& err) {
01577 
01578     int pssmId = 0;
01579 
01580     err.erase();
01581     if (seqAlign.Empty()) {
01582         err = "GetPssmIdFromSeqAlign:  Empty Seq_align.\n";
01583     } else if (seqAlign->IsSetDim() && seqAlign->GetDim() != 2) {
01584         err = "GetPssmIdFromSeqAlign:  Only Seq_aligns with dim = 2 supported.\n";
01585     } else if (seqAlign->GetSegs().IsDenseg()) {
01586         const CRef< CSeq_id >& pssmSeqId = seqAlign->GetSegs().GetDenseg().GetIds().back();
01587         pssmId = GetCDDPssmIdFromSeqId(pssmSeqId);
01588     } else if (seqAlign->GetSegs().IsDendiag()) {
01589         err = "GetPssmIdFromSeqAlign:  Dense_diags not currently supported.\n";
01590     } else {
01591         err.append("GetPssmIdFromSeqAlign:  Seq_align is an unsupported type (%d).\n", seqAlign->GetType());
01592     }
01593     return pssmId;    
01594 }
01595 
01596 //  Return the GI of the master sequence of the Seq_align.  If not a GI, 
01597 //  or for other error, return 0.
01598 int GetMasterGIFromSeqAlign(const CRef< CSeq_align >& seqAlign, string& err) {
01599 
01600     int gi = 0;
01601 
01602     err.erase();
01603     if (seqAlign.Empty()) {
01604         err = "GetMasterGIFromSeqAlign:  Empty Seq_align.\n";
01605     } else if (seqAlign->GetSegs().IsDenseg()) {
01606         const CRef< CSeq_id >& seqId = seqAlign->GetSegs().GetDenseg().GetIds().front();
01607         if (seqId.NotEmpty() && seqId->IsGi()) {
01608             gi = seqId->GetGi();
01609         } else {
01610             err = "GetMasterGIFromSeqAlign:  Dense_seg's master sequence is empty or not of type 'GI'.\n";
01611         }
01612     } else if (seqAlign->GetSegs().IsDendiag()) {
01613         const CRef< CSeq_id >& seqId = seqAlign->GetSegs().GetDendiag().front()->GetIds().front();
01614         if (seqId.NotEmpty() && seqId->IsGi()) {
01615             gi = seqId->GetGi();
01616         } else {
01617             err = "GetMasterGIFromSeqAlign:  Dense_diag's master sequence is empty or not of type 'GI'.\n";
01618         }
01619     } else {
01620         err.append("GetMasterGIFromSeqAlign:  Seq_align is an unsupported type (%d).\n", seqAlign->GetType());
01621     }
01622     return gi;    
01623 }
01624 
01625 
01626 END_SCOPE(cd_utils) // namespace ncbi::objects::
01627 END_NCBI_SCOPE
01628 
01629 

Generated on Wed Dec 9 04:01:32 2009 for NCBI C++ ToolKit by  doxygen 1.4.6
Modified on Wed Dec 09 08:17:49 2009 by modify_doxy.py rev. 173732