NCBI C++ ToolKit
cuBlock.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

00001 /* $Id: cuBlock.cpp 47994 2010-11-23 17:00:06Z gouriano $
00002  * ===========================================================================
00003  *
00004  *                            PUBLIC DOMAIN NOTICE
00005  *               National Center for Biotechnology Information
00006  *
00007  *  This software/database is a "United States Government Work" under the
00008  *  terms of the United States Copyright Act.  It was written as part of
00009  *  the author's official duties as a United States Government employee and
00010  *  thus cannot be copyrighted.  This software/database is freely available
00011  *  to the public for use. The National Library of Medicine and the U.S.
00012  *  Government have not placed any restriction on its use or reproduction.
00013  *
00014  *  Although all reasonable efforts have been taken to ensure the accuracy
00015  *  and reliability of the software and data, the NLM and the U.S.
00016  *  Government do not and cannot warrant the performance or results that
00017  *  may be obtained by using this software or data. The NLM and the U.S.
00018  *  Government disclaim all warranties, express or implied, including
00019  *  warranties of performance, merchantability or fitness for any particular
00020  *  purpose.
00021  *
00022  *  Please cite the author in any work or product based on this material.
00023  */
00024 
00025 #include <ncbi_pch.hpp>
00026 #include <algo/structure/cd_utils/cuBlock.hpp>
00027 #include <algo/structure/cd_utils/cuAlign.hpp>
00028 #include <algo/structure/cd_utils/cuSequence.hpp>
00029 #include <algo/structure/cd_utils/cuUtils.hpp>
00030 
00031 BEGIN_NCBI_SCOPE
00032 BEGIN_SCOPE(cd_utils)
00033 
00034 //Block //////////////////////////////////
00035 
00036 Block::Block (int start, int len, int id)
00037     :m_len(len), m_start(start), m_id(id)
00038 {
00039 }
00040 
00041 Block::Block (int start, int len)
00042     :m_len(len), m_start(start), m_id(-1)
00043 {
00044 }
00045 
00046 Block::Block ()
00047     :m_len(-1), m_start(-1), m_id(-1)
00048 {}
00049 
00050 Block::Block(const Block& rhs)
00051 :m_len(rhs.m_len), m_start(rhs.m_start), m_id(rhs.m_id)
00052 {
00053 }
00054 
00055 const Block& Block::operator=(const Block& rhs)
00056 {
00057     m_start = rhs.m_start;
00058     m_len = rhs.m_len;
00059     m_id = rhs.m_id;
00060     return *this;
00061 }
00062 
00063 bool Block::operator==(const Block& rhs) const
00064 {
00065     return (m_start == rhs.m_start) && (m_len == rhs.m_len);
00066 }
00067 
00068 bool Block::operator!=(const Block& rhs) const
00069 {
00070     return !((*this) == rhs);
00071 }
00072 
00073 bool Block::contain(const Block& rhs) const
00074 {
00075     return (m_start <= rhs.getStart()) && (getEnd() >= rhs.getEnd());
00076 }
00077 
00078 bool Block::isIntersecting(const Block& rhs)const 
00079 {
00080     int x0 = m_start;
00081     int x1 = getEnd();
00082     int y0 = rhs.m_start;
00083     int y1 = rhs.getEnd();
00084     for (int y = y0; y <= y1; y++)
00085     {
00086         if ( y >= x0 && y <= x1)
00087             return true;
00088     }
00089     return false;
00090 }
00091 
00092 //shrink rhs to the intersection of rhs with this 
00093 bool Block::intersect(Block& rhs)const 
00094 {
00095     int x0 = m_start;
00096     int x1 = getEnd();
00097     int y0 = rhs.m_start;
00098     int y1 = rhs.getEnd();
00099     int yStart = -1;
00100     //int yEnd = -1;
00101     bool intersected = false;
00102     int y = y0;
00103     for (; y <= y1; y++)
00104     {
00105         if (!intersected)
00106         {
00107             if ( y >= x0 && y <= x1)
00108             {
00109                 intersected = true;
00110                 yStart = y;
00111             }
00112         }
00113         else
00114         {
00115             if ( y < x0 || y > x1)
00116             {
00117                 //yEnd = y - 1;
00118                 break;
00119             }
00120         }
00121     }
00122     if (intersected)
00123     {
00124         rhs.m_start = yStart;
00125         rhs.setEnd(y-1);
00126     }
00127     return intersected;
00128 }
00129 
00130 //return this + deltaBlock
00131 //Block Block::applyDelta(const DeltaBlock& delta) const
00132 Block Block::operator+(const DeltaBlock& delta) const
00133 {
00134     return Block(m_start + delta.deltaStart, m_len + delta.deltaLen, delta.subjectBlockID);
00135 }
00136 
00137 /* DeltaBlock:
00138     int subjectBlockID;
00139     int objectBlockID;
00140     int deltaStart;
00141     int deltaLen;
00142 */
00143 //return this-object
00144 //DeltaBlock Block::getDelta (const Block& object) const
00145 DeltaBlock Block::operator-(const Block& object)const
00146 {
00147     DeltaBlock delta = {m_id, object.m_id, m_start - object.m_start, m_len - object.m_len};
00148     return delta;
00149 }
00150 
00151 Block Block::extend (int nExt, int cExt) const
00152 {
00153     int start = m_start + nExt;
00154     int len = m_len + cExt - nExt;
00155     return Block(start, len, m_id);
00156 }
00157 
00158 void Block::extendSelf (int nExt, int cExt)
00159 {
00160     m_start = m_start + nExt;
00161     m_len = m_len + cExt - nExt;
00162 }
00163 
00164 void Block::addOffset(int nExt)
00165 {
00166     m_start = m_start + nExt;
00167 }
00168 
00169 bool Block::concatenate(const SortedBlocks& blocks, Block& comBlock)
00170 {
00171     if (blocks.size() ==0)
00172         return false;
00173     SortedBlocks::const_iterator sit = blocks.begin();
00174     comBlock.m_id = sit->m_id;
00175     comBlock.m_start = sit->m_start;
00176     comBlock.m_len = sit->m_len;
00177     sit++;
00178     for (; sit != blocks.end(); ++sit)
00179     {
00180         if( (comBlock.getEnd() + 1) != sit->m_start)
00181             //there is a gap between two adjacent blocks, can't join them
00182             return false;
00183         else
00184             comBlock.m_len += sit->m_len;
00185     }
00186     return true;
00187 }
00188 
00189 //BlockModel-------------------------------------------------
00190 
00191 BlockModel::BlockModel() 
00192     : m_blocks(), m_seqId()
00193 {
00194 }
00195 
00196 BlockModel::BlockModel(const CRef< CSeq_align> seqAlign, bool forSlave) 
00197     : m_blocks(), m_seqId()
00198 {
00199     GetSeqID(seqAlign, m_seqId, forSlave);
00200     vector<int> lens, starts;
00201     GetBlockLengths(seqAlign, lens);
00202     GetBlockStarts(seqAlign, starts, !forSlave);
00203     assert(lens.size() == starts.size());
00204     for (int i = 0; i < lens.size(); i++)
00205     {
00206         m_blocks.push_back(Block(starts[i], lens[i], i));
00207     }
00208 }
00209 
00210 BlockModel::BlockModel(CRef< CSeq_id > seqId, bool withOneBlock)
00211     : m_seqId(seqId)
00212 {
00213     if (withOneBlock)
00214     {
00215         Block block(0,1,0);
00216         m_blocks.push_back(block);
00217     }
00218 }
00219 
00220 BlockModel::BlockModel(const BlockModel& rhs)
00221     : m_blocks(rhs.m_blocks), m_seqId(rhs.m_seqId) 
00222 {
00223 }
00224 
00225 void BlockModel::addBlock(Block& block)
00226 {
00227     block.setId(m_blocks.size());
00228     m_blocks.push_back(block);
00229 }
00230 
00231 BlockModel& BlockModel::operator=(const BlockModel& rhs)
00232 {
00233     m_seqId = rhs.m_seqId;
00234     m_blocks.assign(rhs.m_blocks.begin(), rhs.m_blocks.end());
00235     return *this;
00236 }
00237 
00238 bool BlockModel::isAlike(const BlockModel& rhs) const
00239 {
00240     if( SeqIdsMatch(m_seqId, rhs.m_seqId) &&( m_blocks.size() == rhs.m_blocks.size()) )
00241         return true;
00242     else
00243         return false;
00244 }
00245 
00246 
00247 bool BlockModel::operator==(const BlockModel& rhs) const
00248 {
00249     if (!isAlike(rhs))
00250         return false;
00251     for ( int i = 0; i < m_blocks.size(); i++)
00252     {
00253         if (m_blocks[i] != rhs.m_blocks[i])
00254             return false;
00255     }
00256     return true;
00257 }
00258 
00259 bool BlockModel::blockMatch(const BlockModel& rhs) const
00260 {
00261     if (m_blocks.size() != rhs.m_blocks.size())
00262         return false;
00263     for ( int i = 0; i < m_blocks.size(); i++)
00264     {
00265         if (m_blocks[i].getLen() != rhs.m_blocks[i].getLen())
00266             return false;
00267     }
00268     return true;
00269 }
00270 
00271 bool BlockModel::completeModelExtendsIntoUnallowedGappedRegion(const BlockModel& completeModel, int sequenceLength, const vector<int>* commonBlockExt) const 
00272 {
00273     unsigned int nBlocks = m_blocks.size(), commonBlockExtSize = (commonBlockExt) ? commonBlockExt->size() : 0;
00274     unsigned int i, j, k;
00275     int seqLen = -1;
00276     int thisStart, thisLen, thisContiguousLen;
00277     int commonNTermBlockZeroExt, nTermShift;
00278     int completeBlockStart, completeBlockLen;
00279     int prevCTermBlockExt, allowedCTermBlockExt, lastCTermBlockExt;
00280     int blockNumberOnThisOfCompleteBlockStart;
00281     vector<int> commonCTermBlockExt, gapSizeAfterBlockInThis;
00282     bool useInputBlockExtensions = (commonBlockExt && commonBlockExtSize == nBlocks + 1);
00283     bool completeExtendsIntoGappedRegionInThis = false;
00284     BlockModel  thisCopy(*this);               //  so can use non-const methods
00285     BlockModel  completeCopy(completeModel);   //  so can use non-const methods
00286 
00287 //    string thisRowBlockStr = toString();
00288 //    string completeModelStr = completeModel.toString();
00289 
00290     commonNTermBlockZeroExt = (useInputBlockExtensions) ? (*commonBlockExt)[0] : 0;
00291     for (i = 0; i < nBlocks; ++i) 
00292     {
00293         if (i == nBlocks - 1) {
00294             seqLen = sequenceLength;
00295         }
00296         gapSizeAfterBlockInThis.push_back(getGapToCTerminal(i, seqLen));
00297         commonCTermBlockExt.push_back((useInputBlockExtensions) ? (*commonBlockExt)[i+1] : 0);
00298     }
00299 
00300     //  Make sure that there are enough residues for the complete model to 
00301     //  not introduce a shift into the mapped row.  Require that each complete block
00302     //  length not extend into more gaps than are common to all rows in the original block model.
00303     prevCTermBlockExt = 0;
00304     for (j = 0; j < completeCopy.getBlocks().size() && !completeExtendsIntoGappedRegionInThis; ++j) {
00305         completeBlockStart = completeCopy.getBlock(j).getStart();
00306         completeBlockLen   = completeCopy.getBlock(j).getLen();
00307         blockNumberOnThisOfCompleteBlockStart = thisCopy.getBlockNumber(completeBlockStart);
00308 
00309         //  If the completeBlock starts on an unaligned residue of this, getBlockNumber returns -1.
00310         //  In that case, find first aligned residue in the complete block and how many residues into
00311         //  N-terminal gap are needed for the mapping.
00312         nTermShift = 0;
00313         while (blockNumberOnThisOfCompleteBlockStart < 0 && nTermShift+1 < completeBlockLen) {
00314             ++nTermShift;
00315             blockNumberOnThisOfCompleteBlockStart = thisCopy.getBlockNumber(completeBlockStart + nTermShift);
00316         }
00317         //  make sure didn't previously use all residues on C-term extension; if not enough
00318         //  to do N-terminal extension, set nTermShift to zero.
00319         if (nTermShift > 0 && blockNumberOnThisOfCompleteBlockStart >= 0) {
00320             if (blockNumberOnThisOfCompleteBlockStart == 0) {
00321                 if (commonNTermBlockZeroExt - nTermShift < 0) {
00322                     nTermShift = 0;
00323                 }
00324             } else {
00325                 if (gapSizeAfterBlockInThis[blockNumberOnThisOfCompleteBlockStart - 1] - prevCTermBlockExt < nTermShift) {
00326                     nTermShift = 0;
00327                 }
00328             }
00329         } else if (blockNumberOnThisOfCompleteBlockStart < 0) {
00330             nTermShift = 0;
00331         }
00332 
00333         allowedCTermBlockExt = commonCTermBlockExt[blockNumberOnThisOfCompleteBlockStart];
00334 
00335         thisStart = thisCopy.getBlock(blockNumberOnThisOfCompleteBlockStart).getStart();
00336         thisLen   = thisCopy.getBlock(blockNumberOnThisOfCompleteBlockStart).getLen();
00337 
00338         /*
00339         //  There's no gap after this block; find next gap and total number of residues to it;
00340         //  an adjacent block is only possible if there is no allowed extension in the block.
00341         if (allowedCTermBlockExt == 0) {
00342             k = (unsigned) blockNumberOnThisOfCompleteBlockStart; 
00343             while (k + 1 < gapSizeAfterBlockInThis.size() && gapSizeAfterBlockInThis[k] == 0) {
00344                 ++k;
00345                 thisLen += thisCopy.getBlock(k).getLen();
00346             }
00347         }
00348         */
00349 
00350         //  If we've determined there aren't enough N-terminal residues to pad out this to 
00351         //  conform to the complete model, we've done an illegal extension into gapped region.
00352         if (thisStart - completeBlockStart > nTermShift) {
00353             completeExtendsIntoGappedRegionInThis = true;
00354         }
00355 
00356         //  There are enough aligned residues from the start position on child to 
00357         //  grow the block completely w/o extending into disallowed gap regions.
00358         else if (thisLen + allowedCTermBlockExt - (completeBlockStart - thisStart) >= completeBlockLen) {
00359             prevCTermBlockExt = completeBlockLen - thisLen + (completeBlockStart - thisStart);
00360             if (prevCTermBlockExt < 0) prevCTermBlockExt = 0;
00361 //            prevCTermBlockExt = allowedCTermBlockExt;
00362         } 
00363         else {
00364 
00365             //  When complete block is large and covers several blocks in this, see if
00366             //  there is a long enough contiguous length so can safely map all blocks
00367             //  into the single large one.  Stops being contiguous when any gap in this is
00368             //  larger than the specified common gap.
00369             k = (unsigned) blockNumberOnThisOfCompleteBlockStart; 
00370             thisContiguousLen = thisLen + allowedCTermBlockExt;
00371             lastCTermBlockExt = allowedCTermBlockExt;
00372             while (k + 1 < gapSizeAfterBlockInThis.size() && gapSizeAfterBlockInThis[k] == commonCTermBlockExt[k]) {
00373                 ++k;
00374                 lastCTermBlockExt = gapSizeAfterBlockInThis[k];
00375                 thisContiguousLen += thisCopy.getBlock(k).getLen() + lastCTermBlockExt;
00376             }
00377 
00378 
00379             if (thisContiguousLen - (completeBlockStart - thisStart) >= completeBlockLen) {
00380                 //  this is case where have to extend a contiguous range in this to cover complete block;
00381                 //  since it's contiguous, only need an n-terminal extension in next block if used the
00382                 //  final gap in the contiguous length; otherwise, prevCTermBlockExt set to zero.
00383                 //            prevCTermBlockExt = completeBlockLen - thisContiguousLen + (completeBlockStart - thisStart);
00384                 prevCTermBlockExt = thisContiguousLen - completeBlockLen - (completeBlockStart - thisStart);
00385                 if (prevCTermBlockExt > lastCTermBlockExt || prevCTermBlockExt < 0) {
00386                     prevCTermBlockExt = 0;
00387                 }
00388             } else {
00389                 completeExtendsIntoGappedRegionInThis = true;
00390             }
00391         }
00392 
00393     }
00394 
00395     return completeExtendsIntoGappedRegionInThis;
00396 }
00397 
00398 bool BlockModel::contain(const BlockModel& rhs) const
00399 {
00400     if (!isAlike(rhs))
00401         return false;
00402     for ( int i = 0; i < m_blocks.size(); i++)
00403     {
00404         if (!m_blocks[i].contain(rhs.m_blocks[i]))
00405             return false;
00406     }
00407     return true;
00408 }
00409 //delta = bm - this
00410 //bool BlockModel::getCastingDelta(const BlockModel& bm, DeltaBlockModel& delta) const
00411 //return (this-delta), status).  status = true if complete
00412 //complete means every block in this should be accounted for for at least once
00413 pair<DeltaBlockModel*, bool> BlockModel::operator-(const BlockModel& bm) const
00414 {
00415     DeltaBlockModel* delta = new DeltaBlockModel();
00416     set<DeltaBlock> uniqueDelta;
00417     for (int i = 0; i < bm.m_blocks.size(); i++)
00418     {
00419         minusOneBlock(bm.m_blocks[i], *delta);
00420     }
00421     for (DeltaBlockModel::iterator dit = delta->begin(); dit != delta->end(); dit++)
00422     {
00423         uniqueDelta.insert(*dit);
00424     }
00425     delta->clear();
00426     for (set<DeltaBlock>::iterator sit = uniqueDelta.begin(); sit != uniqueDelta.end(); sit++)
00427         delta->insert(*sit);
00428     return pair<DeltaBlockModel*, bool>(delta, delta->size() == m_blocks.size());
00429 }
00430 
00431 pair<DeltaBlockModel*, bool> BlockModel::intersect(const BlockModel& bm) const
00432 {
00433     DeltaBlockModel* delta = new DeltaBlockModel();
00434 
00435     for (int i = 0; i < bm.m_blocks.size(); i++)
00436     {
00437         intersectOneBlock(bm.m_blocks[i], *delta);
00438     }
00439     return pair<DeltaBlockModel*, bool>(delta, delta->size() == m_blocks.size());
00440 }
00441 
00442 
00443 //assume delta = AnotherBlockModel - this
00444 /* don't need it now.  may work on this later
00445 void BlockModel::concatenateOverlaps()
00446 {
00447     DeltaBlockModel::iterator dit = delta.begin();
00448     while (dit != delta.end())
00449     {
00450         pair<DeltaBlockModel::iterator, DeltaBlockModel::iterator> range =
00451             delta.equal_range(*dit);
00452         DeltaBlockModel::iterator onePast = range.first;
00453         onePast++;
00454         if (onePast == range.second) 
00455         {
00456             //there is only one DeltaBlock for this subjectBlock
00457             m_blocks[dit->objectBlockID].retrofit(*dit);
00458             dit++;
00459         }
00460         else
00461         {
00462             Block::SortedBlocks blocks;
00463             for (DeltaBlockModel::iterator tmp = range.first; tmp != range.second; tmp++)
00464             {
00465                 blocks.insert(m_blocks[tmp->objectBlockID]);
00466             }
00467             Block comBlock;
00468             if (Block::concatenate(blocks, comBlock))
00469             {
00470                 comBlock.retrofit(*(range.first));
00471                 dit = delta.erase(onePast, range.second);
00472             }
00473             else
00474             {
00475                 for (DeltaBlockModel::iterator tmp = range.first; tmp != range.second; tmp++)
00476                 {
00477                     m_blocks[tmp->objectBlockID].retrofit(*tmp);
00478                 }
00479                 dit = range.second;
00480             }
00481         }
00482     }
00483 }*/
00484 
00485 //return(this + delta, complete)
00486 pair<BlockModel*, bool> BlockModel::operator+(const DeltaBlockModel& delta) const
00487 {
00488     BlockModel* result = new BlockModel();
00489     DeltaBlockModel::const_iterator dt = delta.begin();
00490     result->m_seqId = m_seqId;
00491     int resultBlockID = 0;
00492     for (; dt != delta.end(); ++dt)
00493     {
00494         //const DeltaBlock& db = *dt;
00495         if (dt->objectBlockID < 0 || dt->objectBlockID >= m_blocks.size())
00496         {
00497             delete result;
00498             return pair<BlockModel*, bool>(nullptr, false);
00499         }
00500         const Block& srcBlock = m_blocks[dt->objectBlockID];
00501         Block block = srcBlock + (*dt);
00502         if (block.getLen() > 0 && block.getStart() >=0)
00503             result->m_blocks.push_back(block);
00504         resultBlockID++;
00505     }
00506     //int eb;
00507     return pair<BlockModel*, bool>(result, (result->getBlocks().size() == delta.size()) );
00508 }
00509 
00510 //cast this to target
00511 BlockModel* BlockModel::completeCastTo(const BlockModel& target) const
00512 {
00513     pair<DeltaBlockModel*, bool> deltaStatus = target - (*this);
00514 //    string dbmStr = DeltaBlockModelToString(*deltaStatus.first);
00515     if (!(deltaStatus.second)) //not complete
00516     {
00517         delete deltaStatus.first;
00518         return 0;
00519     }
00520     pair<BlockModel*, bool> bmStatus = (*this) + (*deltaStatus.first);
00521 //    string bmStr = (*bmStatus.first).toString();
00522     delete deltaStatus.first;
00523     if (bmStatus.second)
00524     {
00525         if (target.contain(*(bmStatus.first)))
00526             return bmStatus.first;
00527     }
00528     //all other conditions are considered fail  
00529     delete bmStatus.first;
00530     return 0;
00531     
00532 }
00533 
00534 //this - aBlock
00535 bool BlockModel::minusOneBlock(const Block& aBlock, DeltaBlockModel& delta) const
00536 {
00537     //const Block& myBlock = m_blocks[myBlockNum];
00538     vector<int> blockIds;
00539     findIntersectingBlocks(aBlock, blockIds);
00540     if (blockIds.size() <= 0)
00541         return false;
00542     for (unsigned int j = 0; j < blockIds.size(); j++)
00543     {
00544         delta.insert(m_blocks[blockIds[j]] - aBlock);
00545     }
00546     return true;
00547 }
00548 
00549 //this intersect aBlock
00550 bool BlockModel::intersectOneBlock(const Block& aBlock, DeltaBlockModel& delta) const
00551 {
00552     //const Block& myBlock = m_blocks[myBlockNum];
00553     vector<int> blockIds;
00554     findIntersectingBlocks(aBlock, blockIds);
00555     if (blockIds.size() <= 0)
00556         return false;
00557     for (unsigned int j = 0; j < blockIds.size(); j++)
00558     {
00559         Block intersectedBlock(aBlock);
00560         if (m_blocks[blockIds[j]].intersect(intersectedBlock))
00561             delta.insert(intersectedBlock - aBlock);
00562     }
00563     return true;
00564 }
00565 
00566 void BlockModel::findIntersectingBlocks(const Block& target, vector<int>& result) const
00567 {
00568     for (int i = 0; i < m_blocks.size(); i++)
00569     {
00570         if(target.isIntersecting(m_blocks[i]))
00571             result.push_back(i);
00572     }
00573 }
00574 /*
00575 const BlockModel& BlockModel::operator+=(const BlockModel& delta)
00576 {
00577     if (!isAlike(delta))
00578         return *this;
00579     for ( int i = 0; i < m_blocks.size(); i++)
00580     {
00581         m_blocks[i] += delta.m_blocks[i];
00582     }
00583     return *this;
00584 }
00585 
00586 BlockModel BlockModel::operator-(const BlockModel& rhs)
00587 {
00588     if(!isAlike(rhs))
00589         return *this;
00590     BlockModel delta;
00591     for ( int i = 0; i < m_blocks.size(); i++)
00592     {
00593         delta.m_blocks.push_back(m_blocks[i] - rhs.m_blocks[i]);
00594     }
00595     return delta;
00596 }
00597 */
00598 
00599 CRef<CSeq_align> BlockModel::toSeqAlign(const BlockModel& master) const
00600 {
00601     CRef<CSeq_align> sa;
00602     int eb;
00603     if (!master.isValid(-1, eb))
00604         return sa;
00605     if (!isValid(-1, eb))
00606         return sa;
00607     if (!blockMatch(master))
00608         return sa;
00609     
00610     sa = new CSeq_align();
00611     sa->Reset();
00612     sa->SetType(CSeq_align::eType_partial);
00613     sa->SetDim(2);
00614     TDendiag& ddList = sa->SetSegs().SetDendiag();
00615     for (int i = 0; i < m_blocks.size(); i++)
00616     {
00617         CRef< CDense_diag > dd(new CDense_diag());
00618         dd->SetDim(2);
00619         vector< CRef< CSeq_id > >& seqIds = dd->SetIds();
00620 
00621         //master seqId
00622         CRef< CSeq_id > seqIdMaster = CopySeqId(master.getSeqId());
00623         seqIds.push_back(seqIdMaster);
00624     
00625         //slave seqId
00626         CRef< CSeq_id > seqIdSlave = CopySeqId(getSeqId());
00627         seqIds.push_back(seqIdSlave);
00628         CDense_diag::TStarts& starts = dd->SetStarts();
00629         starts.push_back(master.m_blocks[i].getStart());
00630         starts.push_back(m_blocks[i].getStart());
00631         dd->SetLen(m_blocks[i].getLen());
00632         
00633         ddList.push_back(dd);
00634     }
00635     
00636     return sa;
00637 }
00638 
00639 int BlockModel::getLastAlignedPosition()const
00640 {
00641     const Block& lastBlock = *(m_blocks.rbegin());
00642     return lastBlock.getEnd();
00643 }
00644 
00645 int BlockModel::getFirstAlignedPosition()const
00646 {
00647     const Block& firstBlock = *(m_blocks.begin());
00648     return firstBlock.getStart();
00649 }
00650 
00651 int BlockModel::getTotalBlockLength () const 
00652 {
00653     int len = 0;
00654     for (int i = 0; i < m_blocks.size(); i++)
00655     {
00656         len += m_blocks[i].getLen();
00657     }
00658     return len;
00659 }
00660 
00661 int BlockModel::getGapToNTerminal(int bn) const
00662 {
00663     int gap = 0;
00664     if (bn == 0)
00665         gap = m_blocks[bn].getStart();
00666     else
00667     {
00668         int delta = m_blocks[bn].getStart() - m_blocks[bn - 1].getEnd() - 1;
00669         if (delta >= 0)
00670             gap = delta;
00671     }
00672     return gap;
00673 }
00674 
00675 int BlockModel::getGapToCTerminal(int bn, int len)const 
00676 {
00677     int gap = 0;
00678     if (bn == (m_blocks.size() - 1)) //last blast
00679     {
00680         if (len > 0)
00681             gap = (len - 1) - m_blocks[bn].getEnd();
00682     }
00683     else
00684     {
00685         int delta = m_blocks[bn + 1].getStart() - m_blocks[bn].getEnd() - 1;
00686         if (delta >= 0)
00687             gap = delta;
00688     }
00689     return gap;
00690 }
00691 
00692 void BlockModel::addOffset(int nExt)
00693 {
00694     for (int i = 1; i < m_blocks.size(); i++)
00695     {
00696         m_blocks[i].addOffset(nExt);
00697     }
00698 }
00699 
00700 bool  BlockModel::isValid(int seqLen, int& errBlock) const
00701 {
00702     if (m_blocks.size() == 0)
00703         return false;
00704     if (seqLen > 1 && getLastAlignedPosition() >= seqLen)
00705     {
00706         errBlock = m_blocks.size() - 1;
00707         return false;
00708     }
00709     if (!m_blocks[0].isValid())
00710     {
00711         errBlock = 0;
00712         return false;
00713     }
00714     for (int i = 1; i < m_blocks.size(); i++)
00715     {
00716         if (!m_blocks[i].isValid())
00717         {
00718             errBlock = i;
00719             return false;
00720         }
00721         if (m_blocks[i-1].getEnd() >= m_blocks[i].getStart())
00722         {
00723             errBlock = i -1;
00724             return false;
00725         }
00726     }
00727     return true;
00728 }
00729 
00730 
00731 bool BlockModel::overlap(const BlockModel& bm)const
00732 {
00733     if (!SeqIdsMatch(m_seqId, bm.m_seqId))
00734         return false;
00735     int bmLo = bm.getFirstAlignedPosition();
00736     int bmHi = bm.getLastAlignedPosition();
00737     int lo = getFirstAlignedPosition();
00738     int hi = getLastAlignedPosition();
00739     if (lo <= bmLo)
00740         return hi >= bmLo;
00741     else
00742         return lo <= bmHi;
00743 }
00744 
00745 //return -1 if pos is unaligned
00746 int BlockModel::getBlockNumber(int pos) const
00747 {
00748     int i = 0;
00749     for (; i < m_blocks.size(); i++)
00750     {
00751         if (pos >= m_blocks[i].getStart() && pos <= m_blocks[i].getEnd())
00752             break;
00753     }
00754     if (i >= m_blocks.size())
00755         return -1;
00756     else
00757         return i;
00758 }
00759 
00760 bool BlockModel::mask(const BlockModel& maskBlockModel)
00761 {
00762     bool result = (SeqIdsMatch(getSeqId(), maskBlockModel.getSeqId()));
00763     if (result) {
00764         result = mask(maskBlockModel.getBlocks());
00765     }
00766     return result;
00767 }
00768 
00769 //  Returns true if this object was modified; false if there was no effect.
00770 bool BlockModel::mask(const vector<Block>& maskBlocks)
00771 {
00772     vector<Block> maskedBlocks;
00773     vector<Block>& originalBlocks = getBlocks();
00774     unsigned int i, nOrigBlocks = originalBlocks.size();
00775     unsigned int j, nMaskBlocks = maskBlocks.size();
00776     int origTotalLength = getTotalBlockLength();
00777     int newBlockId = 0;
00778     int pos, start, len;
00779     int origStart, origEnd;
00780     int maskBlockStart, maskBlockEnd, maskFirst, maskLast;
00781     bool hasEffect;
00782 
00783     if (nOrigBlocks == 0 || nMaskBlocks == 0) return false;
00784 
00785     //  Collect all mask positions to simplify search code.
00786     set<int> maskPositions;
00787     set<int>::iterator maskPosEnd;
00788     for (i = 0; i < nMaskBlocks; ++i) {
00789         maskBlockStart = maskBlocks[i].getStart();
00790         maskBlockEnd = maskBlocks[i].getEnd();
00791         for (j = maskBlockStart; j <= maskBlockEnd; ++j) maskPositions.insert(j);
00792     }
00793     maskPosEnd = maskPositions.end();
00794 
00795     maskFirst = maskBlocks[0].getStart();
00796     maskLast = maskBlocks.back().getEnd();
00797 
00798     for (i = 0; i < nOrigBlocks; ++i) {
00799         origStart = originalBlocks[i].getStart();
00800         origEnd = originalBlocks[i].getEnd();
00801 
00802         //  If origBlock does not intersects the maskBlocks footprint, it is unmasked and can be directly copied.
00803         if (origEnd < maskFirst || origStart > maskLast) {
00804             maskedBlocks.push_back(originalBlocks[i]);
00805             maskedBlocks.back().setId(newBlockId);
00806             ++newBlockId;
00807             continue;
00808         }
00809 
00810         start = -1;
00811         len = 0;
00812         for (pos = origStart; pos <= origEnd; ++pos) {
00813 
00814             //  If position is masked; end current block.
00815             if (maskPositions.find(pos) != maskPosEnd) {
00816                 if (len > 0) {
00817                     maskedBlocks.push_back(Block(start, len, newBlockId));
00818                     ++newBlockId;
00819                 }
00820                 len = 0;
00821                 start = -1;
00822             }
00823             else
00824             {
00825                 //  Found the first position in a new block.
00826                 if (len == 0) {
00827                     start = pos;
00828                 }
00829                 ++len;
00830             }
00831 
00832         }  //  end loop on original block positions
00833 
00834         //  Make sure to include the block at the end...
00835         if (len > 0) {
00836             maskedBlocks.push_back(Block(start, len, newBlockId));
00837         }
00838 
00839     }  //  end loop on original blocks
00840 
00841     _ASSERT(getTotalBlockLength() <= origTotalLength);
00842     hasEffect = (getTotalBlockLength() != origTotalLength);
00843     if (hasEffect) {
00844         originalBlocks.clear();
00845         originalBlocks = maskedBlocks;
00846     }
00847 
00848     return hasEffect;
00849 }
00850 
00851 void BlockModel::clipToRange(unsigned int min, unsigned max)
00852 {
00853     unsigned int nBlocks = getBlocks().size();
00854     if (nBlocks == 0) return;
00855 
00856     vector<Block> maskBlocks;
00857     int firstAligned = getBlocks().front().getStart();
00858     int lastAligned = getBlocks().back().getEnd();
00859 
00860     //  Add a masking block for all residues < min.
00861     if (firstAligned < min) {
00862         maskBlocks.push_back(Block(firstAligned, min - firstAligned));
00863     }
00864     //  Add a masking block for all residues > max.
00865     if (lastAligned > max) {
00866         maskBlocks.push_back(Block(max + 1, lastAligned - max));
00867     }
00868 
00869     mask(maskBlocks);
00870 }
00871 
00872 
00873 string BlockModel::toString() const
00874 {
00875     string blockModelStr, tmp;
00876     unsigned int nBlocks = m_blocks.size();
00877 
00878     if (m_seqId.NotEmpty()) {
00879         blockModelStr = "Sequence:  " + GetSeqIDStr(m_seqId) + "\n";
00880     }
00881 
00882     for (unsigned int i = 0; i < nBlocks; ++i) {
00883         tmp  = "  Block Id = " + NStr::IntToString(m_blocks[i].getId());
00884         tmp += "; Block Range = [" + NStr::IntToString(m_blocks[i].getStart());
00885         tmp += ", " + NStr::IntToString(m_blocks[i].getEnd());
00886         tmp += "] (Length = " + NStr::IntToString(m_blocks[i].getLen()) + ")\n";
00887         blockModelStr += tmp;
00888     }
00889     return blockModelStr;
00890 }
00891 
00892 string DeltaBlockModelToString(const DeltaBlockModel& dbm)
00893 {
00894     string deltaBlockModelStr, tmp;
00895     DeltaBlockModel::const_iterator dbm_cit = dbm.begin(), dbm_citEnd = dbm.end();
00896 
00897     for (; dbm_cit != dbm_citEnd; ++dbm_cit) {
00898         tmp  = "  Delta Block Subject Id = " + NStr::IntToString(dbm_cit->subjectBlockID);
00899         tmp += "; Delta Block Object Id = " + NStr::IntToString(dbm_cit->objectBlockID);
00900         tmp += "; Delta Block Start = " + NStr::IntToString(dbm_cit->deltaStart);
00901         tmp += "; Delta Block Len = " + NStr::IntToString(dbm_cit->deltaLen);
00902         tmp += "\n";
00903         deltaBlockModelStr += tmp;
00904     }
00905     return deltaBlockModelStr;
00906 }
00907 
00908 string BlockModel::toString(const BlockModel& bm) {
00909     return bm.toString();
00910 }
00911 
00912 //implement BlockModel Pair
00913 
00914 BlockModelPair::BlockModelPair():
00915 m_master(0), m_slave(0)
00916 {
00917     m_master = new BlockModel();
00918     m_slave = new BlockModel();
00919 }
00920 
00921 BlockModelPair::BlockModelPair(const CRef< CSeq_align> seqAlign)
00922 {
00923     m_master = new BlockModel(seqAlign, false);
00924     m_slave = new BlockModel(seqAlign, true);
00925 }
00926 
00927 //deep copy
00928 BlockModelPair::BlockModelPair(const BlockModelPair& rhs)
00929 {
00930     if (rhs.m_master) {
00931         m_master = new BlockModel(*rhs.m_master);
00932     }
00933     if (rhs.m_slave) {
00934         m_slave = new BlockModel(*rhs.m_slave);
00935     }
00936 }
00937 
00938 BlockModelPair& BlockModelPair::operator=(const BlockModelPair& rhs)
00939 {
00940     //  The copy can fail if rhs has null pointers.
00941     delete m_master;
00942     delete m_slave;
00943     m_master = NULL;
00944     m_slave = NULL;
00945     if (rhs.m_master) {
00946         m_master = new BlockModel(*rhs.m_master);
00947     }
00948     if (rhs.m_slave) {
00949         m_slave = new BlockModel(*rhs.m_slave);
00950     }
00951     return *this;
00952 }
00953 
00954 BlockModelPair::~BlockModelPair()
00955 {
00956     delete m_master;
00957     delete m_slave;
00958 }
00959 
00960 void BlockModelPair::reset()
00961 {
00962     delete m_master;
00963     delete m_slave;
00964     m_master = new BlockModel();
00965     m_slave = new BlockModel();
00966 }
00967 
00968 BlockModel& BlockModelPair::getMaster()
00969 {
00970     return *m_master;
00971 }
00972 
00973 const BlockModel& BlockModelPair::getMaster()const
00974 {
00975     return *m_master;
00976 }
00977 
00978 BlockModel& BlockModelPair::getSlave()
00979 {
00980     return *m_slave;
00981 }
00982 
00983 const BlockModel& BlockModelPair::getSlave()const
00984 {
00985     return *m_slave;
00986 }
00987 
00988 void BlockModelPair::degap()
00989 {
00990     assert(m_master->blockMatch(*m_slave));
00991     int num = m_master->getBlocks().size();
00992     for (int i = 0; i < num; i++)
00993     {
00994             extendMidway(i);
00995     }
00996 }
00997 
00998 void BlockModelPair::extendMidway(int blockNum)
00999 {
01000     int ngap = m_master->getGapToNTerminal(blockNum); 
01001     if (blockNum == 0) //do N-extend the first block
01002         ngap = 0;
01003     int ngapSlave = m_slave->getGapToNTerminal(blockNum);
01004     if (ngap > ngapSlave)
01005         ngap = ngapSlave;
01006     int cgap = m_master->getGapToCTerminal(blockNum);
01007     int cgapSlave = m_slave->getGapToCTerminal(blockNum);
01008     if (cgap > cgapSlave)
01009         cgap = cgapSlave;
01010 
01011     //c-ext of last block has taken half of the gap, so take what's left here
01012     // negative for moving to N-end.
01013     int nExt = -ngap; 
01014     int cExt = 0;
01015     if (blockNum != (m_master->getBlocks().size()-1) ) //do not C-extend the last block
01016     {
01017         if ((cgap % 2) == 0)
01018             cExt = cgap/2;
01019         else
01020             cExt = cgap/2 + 1;
01021     }
01022     m_master->getBlock(blockNum).extendSelf(nExt, cExt);
01023     m_slave->getBlock(blockNum).extendSelf(nExt, cExt);
01024 }
01025 
01026 CRef<CSeq_align> BlockModelPair::toSeqAlign() const
01027 {
01028     return m_slave->toSeqAlign(*m_master);
01029 }
01030 
01031 
01032 int BlockModelPair::mapToMaster(int slavePos) const
01033 {
01034     int bn = m_slave->getBlockNumber(slavePos);
01035     if (bn < 0)
01036         return -1;
01037     return m_master->getBlock(bn).getStart() + (slavePos - m_slave->getBlock(bn).getStart());
01038 }
01039 
01040 int BlockModelPair::mapToSlave(int masterPos) const
01041 {
01042     int bn = m_master->getBlockNumber(masterPos);
01043     if (bn < 0)
01044         return -1;
01045     return m_slave->getBlock(bn).getStart() + (masterPos - m_master->getBlock(bn).getStart());
01046 }
01047 
01048 bool BlockModelPair::isValid()const
01049 {
01050     return m_master->blockMatch(*m_slave);
01051 }
01052 
01053 //assume this.master is the same as guide.master
01054 //change this.master to guide.slave
01055 //
01056 int BlockModelPair::remaster(const BlockModelPair& guide)
01057 {
01058     if (!SeqIdsMatch(getMaster().getSeqId(),guide.getMaster().getSeqId()))
01059         return 0;
01060     //convert guide.slave to the intersection of this.master and guide.master
01061     pair<DeltaBlockModel*, bool> deltaGuideToThis = m_master->intersect(guide.getMaster());
01062     pair<BlockModel*, bool> intersectedGuideSlave = guide.getSlave() + *(deltaGuideToThis.first);
01063     //convert this.slave to the intersection of this.master and guide.master
01064     pair<DeltaBlockModel*, bool> deltaThisToGuide = guide.getMaster().intersect(*m_master);
01065     pair<BlockModel*, bool> intersectedThisSlave = (*m_slave) + *(deltaThisToGuide.first);
01066     assert((intersectedGuideSlave.first)->blockMatch(*(intersectedThisSlave.first)));
01067     delete m_master;
01068     delete m_slave;
01069     m_master = intersectedGuideSlave.first;
01070     m_slave = intersectedThisSlave.first;
01071     return m_master->getTotalBlockLength();
01072 }
01073 
01074 bool BlockModelPair::mask(const vector<Block>& maskBlocks, bool maskBasedOnMaster)
01075 {
01076     if (!m_master || !m_slave || !isValid()) return false;
01077 
01078     bool hasEffect = false;
01079     BlockModel& maskedBm = (maskBasedOnMaster) ? getMaster() : getSlave();
01080     vector<Block>& newBlocks = maskedBm.getBlocks();
01081     unsigned int i, nBlocks = newBlocks.size();
01082     int pos, mappedPos;
01083 
01084     //  First, mask the primary BlockModel in the pair.
01085     if (maskBlocks.size() > 0 && nBlocks > 0 && maskedBm.mask(maskBlocks)) {
01086 
01087         hasEffect = true;
01088 
01089         //  Then, fix up the other BlockModel in the pair to match the new masked primary BlockModel.
01090         vector<Block>& newOtherBlocks = (maskBasedOnMaster) ? getSlave().getBlocks() : getMaster().getBlocks(); 
01091         newOtherBlocks.clear();
01092         for (i = 0; i < nBlocks; ++i) {
01093             Block& b = newBlocks[i];
01094             pos = b.getStart();
01095             mappedPos = (maskBasedOnMaster) ? mapToSlave(pos) : mapToMaster(pos);
01096             if (mappedPos >= 0) {  // should never fail if bmp.isValid is true.
01097                 newOtherBlocks.push_back(Block(mappedPos, b.getLen(), b.getId()));
01098             }
01099         }
01100     }
01101     return hasEffect;
01102 }
01103 
01104 //reverse the master vs slave
01105 void BlockModelPair::reverse()
01106 {
01107     BlockModel* bm = m_master;
01108     m_master = m_slave;
01109     m_slave = bm;
01110 }
01111 
01112 END_SCOPE(cd_utils)
01113 END_NCBI_SCOPE
Modified on Sat Feb 28 10:56:44 2015 by modify_doxy.py rev. 426318