src/objmgr/seq_map.cpp

Go to the documentation of this file.
00001 /*  $Id: seq_map.cpp 163658 2009-06-17 18:02:59Z vasilche $
00002 * ===========================================================================
00003 *
00004 *                            PUBLIC DOMAIN NOTICE
00005 *               National Center for Biotechnology Information
00006 *
00007 *  This software/database is a "United States Government Work" under the
00008 *  terms of the United States Copyright Act.  It was written as part of
00009 *  the author's official duties as a United States Government employee and
00010 *  thus cannot be copyrighted.  This software/database is freely available
00011 *  to the public for use. The National Library of Medicine and the U.S.
00012 *  Government have not placed any restriction on its use or reproduction.
00013 *
00014 *  Although all reasonable efforts have been taken to ensure the accuracy
00015 *  and reliability of the software and data, the NLM and the U.S.
00016 *  Government do not and cannot warrant the performance or results that
00017 *  may be obtained by using this software or data. The NLM and the U.S.
00018 *  Government disclaim all warranties, express or implied, including
00019 *  warranties of performance, merchantability or fitness for any particular
00020 *  purpose.
00021 *
00022 *  Please cite the author in any work or product based on this material.
00023 *
00024 * ===========================================================================
00025 *
00026 * Authors: Aleksey Grichenko, Michael Kimelman, Eugene Vasilchenko,
00027 *          Andrei Gourianov
00028 *
00029 * File Description:
00030 *   Sequence map for the Object Manager. Describes sequence as a set of
00031 *   segments of different types (data, reference, gap or end).
00032 *
00033 */
00034 
00035 #include <ncbi_pch.hpp>
00036 #include <objmgr/seq_map.hpp>
00037 #include <objmgr/seq_map_ci.hpp>
00038 #include <objmgr/scope.hpp>
00039 #include <objmgr/bioseq_handle.hpp>
00040 #include <objmgr/impl/bioseq_info.hpp>
00041 #include <objmgr/impl/tse_chunk_info.hpp>
00042 #include <objmgr/impl/tse_split_info.hpp>
00043 #include <objmgr/impl/data_source.hpp>
00044 
00045 #include <objects/seq/Bioseq.hpp>
00046 #include <objects/seq/Seq_data.hpp>
00047 #include <objects/seq/Seq_inst.hpp>
00048 #include <objects/seq/Delta_ext.hpp>
00049 #include <objects/seq/Delta_seq.hpp>
00050 #include <objects/seq/Seq_literal.hpp>
00051 #include <objects/seq/Seq_ext.hpp>
00052 #include <objects/seq/Seg_ext.hpp>
00053 #include <objects/seq/Ref_ext.hpp>
00054 
00055 #include <objects/seqloc/Seq_loc.hpp>
00056 #include <objects/seqloc/Seq_point.hpp>
00057 #include <objects/seqloc/Seq_loc_mix.hpp>
00058 #include <objects/seqloc/Seq_loc_equiv.hpp>
00059 #include <objects/seqloc/Seq_interval.hpp>
00060 #include <objects/seqloc/Packed_seqint.hpp>
00061 #include <objects/seqloc/Packed_seqpnt.hpp>
00062 
00063 #include <algorithm>
00064 
00065 BEGIN_NCBI_SCOPE
00066 BEGIN_SCOPE(objects)
00067 
00068 ////////////////////////////////////////////////////////////////////
00069 //  CSeqMap::CSegment
00070 
00071 inline
00072 CSeqMap::CSegment::CSegment(ESegmentType seg_type,
00073                             TSeqPos      length,
00074                             bool         unknown_len)
00075     : m_Position(kInvalidSeqPos),
00076       m_Length(length),
00077       m_UnknownLength(unknown_len),
00078       m_SegType(seg_type),
00079       m_ObjType(seg_type),
00080       m_RefMinusStrand(false),
00081       m_RefPosition(0)
00082 {
00083 }
00084 
00085 ////////////////////////////////////////////////////////////////////
00086 //  CSeqMap
00087 
00088 
00089 CSeqMap::CSeqMap(void)
00090     : m_Bioseq(0),
00091       m_Resolved(0),
00092       m_Mol(CSeq_inst::eMol_not_set),
00093       m_HasSegments(0),
00094       m_Changed(false),
00095       m_SeqLength(kInvalidSeqPos)
00096 {
00097 }
00098 
00099 
00100 CSeqMap::CSeqMap(CSeqMap* /*parent*/, size_t /*index*/)
00101     : m_Bioseq(0),
00102       m_Resolved(0),
00103       m_Mol(CSeq_inst::eMol_not_set),
00104       m_HasSegments(0),
00105       m_Changed(false),
00106       m_SeqLength(kInvalidSeqPos)
00107 {
00108 }
00109 
00110 
00111 CSeqMap::CSeqMap(const CSeq_loc& ref)
00112     : m_Bioseq(0),
00113       m_Resolved(0),
00114       m_Mol(CSeq_inst::eMol_not_set),
00115       m_HasSegments(0),
00116       m_Changed(false),
00117       m_SeqLength(kInvalidSeqPos)
00118 {
00119     x_AddEnd();
00120     x_Add(ref);
00121     x_AddEnd();
00122 }
00123 
00124 /*
00125 CSeqMap::CSeqMap(const CSeq_data& data, TSeqPos length)
00126     : m_Resolved(0),
00127       m_Mol(CSeq_inst::eMol_not_set),
00128       m_HasSegments(0),
00129       m_Changed(false),
00130       m_SeqLength(kInvalidSeqPos)
00131 {
00132     x_AddEnd();
00133     x_Add(data, length);
00134     x_AddEnd();
00135 }
00136 */
00137 
00138 CSeqMap::CSeqMap(TSeqPos length)
00139     : m_Bioseq(0),
00140       m_Resolved(0),
00141       m_Mol(CSeq_inst::eMol_not_set),
00142       m_HasSegments(0),
00143       m_Changed(false),
00144       m_SeqLength(length)
00145 {
00146     x_AddEnd();
00147     x_AddGap(length, false);
00148     x_AddEnd();
00149 }
00150 
00151 
00152 CSeqMap::CSeqMap(const CSeqMap& sm)
00153     : m_Bioseq(0),
00154       m_Segments(sm.m_Segments),
00155       m_Resolved(sm.m_Resolved),
00156       m_Delta(sm.m_Delta),
00157       m_Mol(sm.m_Mol),
00158       m_HasSegments(sm.m_HasSegments),
00159       m_Changed(sm.m_Changed),
00160       m_SeqLength(sm.m_SeqLength)
00161 {
00162     NON_CONST_ITERATE ( TSegments, it, m_Segments ) {
00163         if ( it->m_ObjType == eSeqChunk ) {
00164             it->m_SegType = eSeqGap;
00165             it->m_ObjType = eSeqGap;
00166             it->m_RefObject = null;
00167         }
00168     }
00169 }
00170 
00171 
00172 CSeqMap::CSeqMap(const CSeq_inst& inst)
00173     : m_Bioseq(0),
00174       m_Resolved(0),
00175       m_Mol(CSeq_inst::eMol_not_set),
00176       m_HasSegments(0),
00177       m_Changed(false),
00178       m_SeqLength(kInvalidSeqPos)
00179 {
00180     x_AddEnd();
00181 
00182     if ( inst.IsSetMol() ) {
00183         m_Mol = inst.GetMol();
00184     }
00185     if ( inst.IsSetLength() ) {
00186         m_SeqLength = inst.GetLength();
00187     }
00188 
00189     if ( inst.IsSetSeq_data() ) {
00190         if ( !inst.GetSeq_data().IsGap() ) {
00191             x_Add(inst.GetSeq_data(), inst.GetLength());
00192         }
00193         else {
00194             // split Seq-data
00195             x_AddGap(inst.GetLength(), false, inst.GetSeq_data());
00196         }
00197     }
00198     else if ( inst.IsSetExt() ) {
00199         const CSeq_ext& ext = inst.GetExt();
00200         switch (ext.Which()) {
00201         case CSeq_ext::e_Seg:
00202             ITERATE ( CSeq_ext::TSeg::Tdata, iter, ext.GetSeg().Get() ) {
00203                 x_Add(**iter);
00204             }
00205             break;
00206         case CSeq_ext::e_Ref:
00207             x_Add(ext.GetRef());
00208             break;
00209         case CSeq_ext::e_Delta:
00210             ITERATE ( CSeq_ext::TDelta::Tdata, iter, ext.GetDelta().Get() ) {
00211                 x_Add(**iter);
00212             }
00213             break;
00214         case CSeq_ext::e_Map:
00215             //### Not implemented
00216             NCBI_THROW(CSeqMapException, eUnimplemented,
00217                        "CSeq_ext::e_Map -- not implemented");
00218         default:
00219             //### Not implemented
00220             NCBI_THROW(CSeqMapException, eUnimplemented,
00221                        "CSeq_ext::??? -- not implemented");
00222         }
00223     }
00224     else if ( inst.GetRepr() == CSeq_inst::eRepr_virtual ) {
00225         // Virtual sequence -- no data, no segments
00226         // The total sequence is gap
00227         if ( m_SeqLength == kInvalidSeqPos ) {
00228             m_SeqLength = 0;
00229         }
00230         x_AddGap(m_SeqLength, false);
00231     }
00232     else if ( inst.GetRepr() != CSeq_inst::eRepr_not_set && 
00233               inst.IsSetLength() && inst.GetLength() != 0 ) {
00234         // split seq-data
00235         x_AddGap(inst.GetLength(), false);
00236     }
00237     else {
00238         if ( inst.GetRepr() != CSeq_inst::eRepr_not_set ) {
00239             NCBI_THROW(CSeqMapException, eDataError,
00240                        "CSeq_inst.repr of sequence without data "
00241                        "should be not_set");
00242         }
00243         if ( inst.IsSetLength() && inst.GetLength() != 0 ) {
00244             NCBI_THROW(CSeqMapException, eDataError,
00245                        "CSeq_inst.length of sequence without data "
00246                        "should be 0");
00247         }
00248         x_AddGap(0, false);
00249     }
00250 
00251     x_AddEnd();
00252 }
00253 
00254 
00255 CSeqMap::~CSeqMap(void)
00256 {
00257     _ASSERT(!m_Bioseq);
00258     m_Resolved = 0;
00259     m_Segments.clear();
00260 }
00261 
00262 
00263 void CSeqMap::x_GetSegmentException(size_t /*index*/) const
00264 {
00265     NCBI_THROW(CSeqMapException, eInvalidIndex,
00266                "Invalid segment index");
00267 }
00268 
00269 
00270 CSeqMap::CSegment& CSeqMap::x_SetSegment(size_t index)
00271 {
00272     _ASSERT(index < m_Segments.size());
00273     return m_Segments[index];
00274 }
00275 
00276 
00277 CBioseq_Handle CSeqMap::x_GetBioseqHandle(const CSegment& seg,
00278                                           CScope* scope) const
00279 {
00280     const CSeq_id& seq_id = x_GetRefSeqid(seg);
00281     if ( !scope ) {
00282         NCBI_THROW(CSeqMapException, eNullPointer,
00283                    "Cannot resolve "+
00284                    seq_id.AsFastaString()+": null scope pointer");
00285     }
00286     CBioseq_Handle bh = scope->GetBioseqHandle(seq_id);
00287     if ( !bh ) {
00288         bh = scope->GetBioseqHandle(seq_id);
00289         NCBI_THROW(CSeqMapException, eFail,
00290                    "Cannot resolve "+
00291                    seq_id.AsFastaString()+": unknown");
00292     }
00293     return bh;
00294 }
00295 
00296 
00297 TSeqPos CSeqMap::x_ResolveSegmentLength(size_t index, CScope* scope) const
00298 {
00299     const CSegment& seg = x_GetSegment(index);
00300     TSeqPos length = seg.m_Length;
00301     if ( length == kInvalidSeqPos ) {
00302         if ( seg.m_SegType == eSeqSubMap ) {
00303             length = x_GetSubSeqMap(seg, scope)->GetLength(scope);
00304         }
00305         else if ( seg.m_SegType == eSeqRef ) {
00306             length = x_GetBioseqHandle(seg, scope).GetBioseqLength();
00307         }
00308         if (length == kInvalidSeqPos) {
00309             NCBI_THROW(CSeqMapException, eDataError,
00310                     "Invalid sequence length");
00311         }
00312         seg.m_Length = length;
00313     }
00314     return length;
00315 }
00316 
00317 
00318 TSeqPos CSeqMap::x_ResolveSegmentPosition(size_t index, CScope* scope) const
00319 {
00320     if ( index > x_GetLastEndSegmentIndex() ) {
00321         x_GetSegmentException(index);
00322     }
00323     size_t resolved = m_Resolved;
00324     if ( index <= resolved )
00325         return x_GetSegment(index).m_Position;
00326     TSeqPos resolved_pos = x_GetSegment(resolved).m_Position;
00327     do {
00328         TSeqPos seg_pos = resolved_pos;
00329         resolved_pos += x_GetSegmentLength(resolved, scope);
00330         if (resolved_pos < seg_pos  ||  resolved_pos == kInvalidSeqPos) {
00331             NCBI_THROW(CSeqMapException, eDataError,
00332                     "Sequence position overflow");
00333         }
00334         m_Segments[++resolved].m_Position = resolved_pos;
00335     } while ( resolved < index );
00336     {{
00337         CMutexGuard guard(m_SeqMap_Mtx);
00338         if ( m_Resolved < resolved )
00339             m_Resolved = resolved;
00340     }}
00341     return resolved_pos;
00342 }
00343 
00344 
00345 size_t CSeqMap::x_FindSegment(TSeqPos pos, CScope* scope) const
00346 {
00347     size_t resolved = m_Resolved;
00348     TSeqPos resolved_pos = x_GetSegment(resolved).m_Position;
00349     if ( resolved_pos <= pos ) {
00350         do {
00351             if ( resolved >= x_GetLastEndSegmentIndex() ) {
00352                 // end of segments
00353                 m_Resolved = resolved;
00354                 return size_t(-1);
00355             }
00356             TSeqPos seg_pos = resolved_pos;
00357             resolved_pos += x_GetSegmentLength(resolved, scope);
00358             if (resolved_pos < seg_pos  ||  resolved_pos == kInvalidSeqPos) {
00359                 NCBI_THROW(CSeqMapException, eDataError,
00360                         "Sequence position overflow");
00361             }
00362             m_Segments[++resolved].m_Position = resolved_pos;
00363         } while ( resolved_pos <= pos );
00364         {{
00365             CMutexGuard guard(m_SeqMap_Mtx);
00366             if ( m_Resolved < resolved )
00367                 m_Resolved = resolved;
00368         }}
00369         return resolved - 1;
00370     }
00371     else {
00372         TSegments::const_iterator end = m_Segments.begin()+resolved;
00373         TSegments::const_iterator it = 
00374             upper_bound(m_Segments.begin(), end,
00375                         pos, SPosLessSegment());
00376         if ( it == end ) {
00377             return size_t(-1);
00378         }
00379         return it - m_Segments.begin();
00380     }
00381 }
00382 
00383 
00384 void CSeqMap::x_LoadObject(const CSegment& seg) const
00385 {
00386     _ASSERT(seg.m_Position != kInvalidSeqPos);
00387     if ( !seg.m_RefObject || seg.m_SegType != seg.m_ObjType ) {
00388         const CObject* obj = seg.m_RefObject.GetPointer();
00389         if ( obj && seg.m_ObjType == eSeqChunk ) {
00390             const CTSE_Chunk_Info* chunk =
00391                 dynamic_cast<const CTSE_Chunk_Info*>(obj);
00392             if ( chunk ) {
00393                 chunk->Load();
00394             }
00395         }
00396     }
00397 }
00398 
00399 
00400 CRef<CTSE_Chunk_Info> CSeqMap::x_GetChunkToLoad(const CSegment& seg) const
00401 {
00402     _ASSERT(seg.m_Position != kInvalidSeqPos);
00403     if ( !seg.m_RefObject || seg.m_SegType != seg.m_ObjType ) {
00404         const CObject* obj = seg.m_RefObject.GetPointer();
00405         if ( obj && seg.m_ObjType == eSeqChunk ) {
00406             const CTSE_Chunk_Info* chunk =
00407                 dynamic_cast<const CTSE_Chunk_Info*>(obj);
00408             if ( chunk->NotLoaded() ) {
00409                 return Ref(const_cast<CTSE_Chunk_Info*>(chunk));
00410             }
00411         }
00412     }
00413     return null;
00414 }
00415 
00416 
00417 const CObject* CSeqMap::x_GetObject(const CSegment& seg) const
00418 {
00419     if ( !seg.m_RefObject || seg.m_SegType != seg.m_ObjType ) {
00420         x_LoadObject(seg);
00421     }
00422     if ( !seg.m_RefObject || seg.m_SegType != seg.m_ObjType ) {
00423         NCBI_THROW(CSeqMapException, eNullPointer, "null object pointer");
00424     }
00425     return seg.m_RefObject.GetPointer();
00426 }
00427 
00428 
00429 void CSeqMap::x_SetObject(CSegment& seg, const CObject& obj)
00430 {
00431     // lock for object modification
00432     CMutexGuard guard(m_SeqMap_Mtx);
00433     // check for object
00434     if ( seg.m_RefObject && seg.m_SegType == seg.m_ObjType ) {
00435         NCBI_THROW(CSeqMapException, eDataError, "object already set");
00436     }
00437     // set object
00438     seg.m_ObjType = seg.m_SegType;
00439     seg.m_RefObject.Reset(&obj);
00440     m_Changed = true;
00441 }
00442 
00443 
00444 void CSeqMap::x_SetChunk(CSegment& seg, CTSE_Chunk_Info& chunk)
00445 {
00446     // lock for object modification
00447     //CMutexGuard guard(m_SeqMap_Mtx);
00448     // check for object
00449     if ( seg.m_ObjType == eSeqChunk ||
00450          seg.m_RefObject && seg.m_SegType == seg.m_ObjType ) {
00451         NCBI_THROW(CSeqMapException, eDataError, "object already set");
00452     }
00453     // set object
00454     seg.m_RefObject.Reset(&chunk);
00455     seg.m_ObjType = eSeqChunk;
00456 }
00457 
00458 
00459 CConstRef<CSeqMap> CSeqMap::x_GetSubSeqMap(const CSegment& seg, CScope* scope,
00460                                            bool resolveExternal) const
00461 {
00462     CConstRef<CSeqMap> ret;
00463     if ( seg.m_SegType == eSeqSubMap ) {
00464         ret.Reset(static_cast<const CSeqMap*>(x_GetObject(seg)));
00465     }
00466     else if ( resolveExternal && seg.m_SegType == eSeqRef ) {
00467         ret.Reset(&x_GetBioseqHandle(seg, scope).GetSeqMap());
00468     }
00469     return ret;
00470 }
00471 
00472 
00473 void CSeqMap::x_SetSubSeqMap(size_t /*index*/, CSeqMap_Delta_seqs* /*subMap*/)
00474 {
00475     // not valid in generic seq map -> incompatible objects
00476     NCBI_THROW(CSeqMapException, eDataError, "Invalid parent map");
00477 }
00478 
00479 
00480 const CSeq_data& CSeqMap::x_GetSeq_data(const CSegment& seg) const
00481 {
00482     if ( seg.m_SegType == eSeqData ) {
00483         return *static_cast<const CSeq_data*>(x_GetObject(seg));
00484     }
00485     else if ( seg.m_SegType == eSeqGap && seg.m_ObjType == eSeqData ) {
00486         return *static_cast<const CSeq_data*>(seg.m_RefObject.GetPointer());
00487     }
00488     NCBI_THROW(CSeqMapException, eSegmentTypeError,
00489                "Invalid segment type");
00490 }
00491 
00492 
00493 void CSeqMap::x_SetSeq_data(size_t index, CSeq_data& data)
00494 {
00495     // check segment type
00496     CSegment& seg = x_SetSegment(index);
00497     if ( seg.m_SegType != eSeqData ) {
00498         NCBI_THROW(CSeqMapException, eSegmentTypeError,
00499                    "Invalid segment type");
00500     }
00501     if ( data.IsGap() ) {
00502         ERR_POST("CSeqMap: gap Seq-data was split as real data");
00503         seg.m_SegType = eSeqGap;
00504     }
00505     x_SetObject(seg, data);
00506 }
00507 
00508 
00509 void CSeqMap::x_SetChanged(size_t index)
00510 {
00511     while ( m_Resolved > index ) {
00512         x_SetSegment(m_Resolved--).m_Position = kInvalidSeqPos;
00513     }
00514     m_SeqLength = kInvalidSeqPos;
00515     m_HasSegments = 0;
00516     if ( !m_Changed ) {
00517         m_Changed = true;
00518         if ( m_Bioseq ) {
00519             m_Bioseq->x_SetChangedSeqMap();
00520         }
00521     }
00522 }
00523 
00524 
00525 void CSeqMap::x_StartEditing(void)
00526 {
00527     if ( !m_Bioseq ) {
00528         NCBI_THROW(CSeqMapException, eSegmentTypeError,
00529                    "Cannot edit unattached sequence map");
00530     }
00531     if ( !m_Bioseq->GetDataSource().CanBeEdited() ) {
00532         NCBI_THROW(CSeqMapException, eSegmentTypeError,
00533                    "Bioseq is not in edit state");
00534     }
00535 }
00536 
00537 
00538 void CSeqMap::x_SetSegmentGap(size_t index,
00539                               TSeqPos length,
00540                               CSeq_data* gap_data)
00541 {
00542     if ( gap_data && !gap_data->IsGap() ) {
00543         NCBI_THROW(CSeqMapException, eSegmentTypeError,
00544                    "SetSegmentGap: Seq-data is not gap");
00545     }
00546     CMutexGuard guard(m_SeqMap_Mtx);
00547     x_StartEditing();
00548     CSegment& seg = x_SetSegment(index);
00549     seg.m_SegType = seg.m_ObjType = eSeqGap;
00550     if ( gap_data ) {
00551         seg.m_ObjType = eSeqData;
00552         seg.m_RefObject = gap_data;
00553     }
00554     seg.m_Length = length;
00555     x_SetChanged(index);
00556 }
00557 
00558 
00559 void CSeqMap::x_SetSegmentData(size_t index,
00560                                TSeqPos length,
00561                                CSeq_data& data)
00562 {
00563     CMutexGuard guard(m_SeqMap_Mtx);
00564     x_StartEditing();
00565     CSegment& seg = x_SetSegment(index);
00566     seg.m_SegType = data.IsGap()? eSeqGap: eSeqData;
00567     seg.m_ObjType = eSeqData;
00568     seg.m_RefObject = &data;
00569     seg.m_Length = length;
00570     x_SetChanged(index);
00571 }
00572 
00573 
00574 void CSeqMap::x_SetSegmentRef(size_t index,
00575                               TSeqPos length,
00576                               const CSeq_id& ref_id,
00577                               TSeqPos ref_pos,
00578                               bool ref_minus_strand)
00579 {
00580     CMutexGuard guard(m_SeqMap_Mtx);
00581     x_StartEditing();
00582     CSegment& seg = x_SetSegment(index);
00583     seg.m_SegType = seg.m_ObjType = eSeqRef;
00584     CRef<CSeq_id> id(new CSeq_id);
00585     id->Assign(ref_id);
00586     seg.m_RefObject = id.GetPointer();
00587     seg.m_RefPosition = ref_pos;
00588     seg.m_RefMinusStrand = ref_minus_strand;
00589     seg.m_Length = length;
00590     x_SetChanged(index);
00591 }
00592 
00593 
00594 void CSeqMap::SetSegmentRef(const CSeqMap_CI& seg,
00595                             TSeqPos length,
00596                             const CSeq_id_Handle& ref_id,
00597                             TSeqPos ref_pos,
00598                             bool ref_minus)
00599 {
00600     _ASSERT(&seg.x_GetSegmentInfo().x_GetSeqMap() == this);
00601     size_t index = seg.x_GetSegmentInfo().x_GetIndex();
00602     x_SetSegmentRef(index, length, *ref_id.GetSeqId(), ref_pos, ref_minus);
00603 }
00604 
00605 
00606 void CSeqMap::SetSegmentGap(const CSeqMap_CI& seg,
00607                             TSeqPos length)
00608 {
00609     _ASSERT(&seg.x_GetSegmentInfo().x_GetSeqMap() == this);
00610     size_t index = seg.x_GetSegmentInfo().x_GetIndex();
00611     x_SetSegmentGap(index, length);
00612 }
00613 
00614 
00615 void CSeqMap::SetSegmentGap(const CSeqMap_CI& seg,
00616                             TSeqPos length,
00617                             CSeq_data& gap_data)
00618 {
00619     _ASSERT(&seg.x_GetSegmentInfo().x_GetSeqMap() == this);
00620     size_t index = seg.x_GetSegmentInfo().x_GetIndex();
00621     x_SetSegmentGap(index, length, &gap_data);
00622 }
00623 
00624 
00625 void CSeqMap::SetSegmentData(const CSeqMap_CI& seg,
00626                              TSeqPos length,
00627                              CSeq_data& data)
00628 {
00629     _ASSERT(&seg.x_GetSegmentInfo().x_GetSeqMap() == this);
00630     size_t index = seg.x_GetSegmentInfo().x_GetIndex();
00631     x_SetSegmentData(index, length, data);
00632 }
00633 
00634 
00635 CSeqMap_CI CSeqMap::InsertSegmentGap(const CSeqMap_CI& seg0,
00636                                      TSeqPos length)
00637 {
00638     _ASSERT(&seg0.x_GetSegmentInfo().x_GetSeqMap() == this);
00639     size_t index = seg0.x_GetSegmentInfo().x_GetIndex();
00640     TSeqPos pos = x_GetSegmentPosition(index, 0);
00641     CMutexGuard guard(m_SeqMap_Mtx);
00642     x_StartEditing();
00643     _ASSERT(m_Resolved >= index);
00644     m_Segments.insert(m_Segments.begin() + index, CSegment(eSeqGap, length));
00645     ++m_Resolved;
00646     x_SetSegment(index).m_Position = pos;
00647     x_SetChanged(index);
00648     return CSeqMap_CI(seg0, *this, index, pos);
00649 }
00650 
00651 
00652 CSeqMap_CI CSeqMap::RemoveSegment(const CSeqMap_CI& seg0)
00653 {
00654     _ASSERT(&seg0.x_GetSegmentInfo().x_GetSeqMap() == this);
00655     size_t index = seg0.x_GetSegmentInfo().x_GetIndex();
00656     TSeqPos pos = x_GetSegmentPosition(index, 0);
00657     CMutexGuard guard(m_SeqMap_Mtx);
00658     x_StartEditing();
00659     CSegment& seg = x_SetSegment(index);
00660     if ( seg.m_SegType == eSeqEnd ) {
00661         NCBI_THROW(CSeqMapException, eSegmentTypeError,
00662                    "cannot remove end segment");
00663     }
00664     _ASSERT(m_Resolved >= index);
00665     m_Segments.erase(m_Segments.begin() + index);
00666     if ( m_Resolved > index ) {
00667         --m_Resolved;
00668     }
00669     x_SetSegment(index).m_Position = pos;
00670     x_SetChanged(index);
00671     _ASSERT(m_Resolved == index);
00672     return CSeqMap_CI(seg0, *this, index, pos);
00673 }
00674 
00675 
00676 void CSeqMap::LoadSeq_data(TSeqPos pos, TSeqPos len,
00677                            const CSeq_data& data)
00678 {
00679     size_t index = x_FindSegment(pos, 0);
00680     const CSegment& seg = x_GetSegment(index);
00681     if ( seg.m_Position != pos || seg.m_Length != len ) {
00682         NCBI_THROW(CSeqMapException, eDataError,
00683                    "Invalid segment size");
00684     }
00685     x_SetSeq_data(index, const_cast<CSeq_data&>(data));
00686 }
00687 
00688 
00689 const CSeq_id& CSeqMap::x_GetRefSeqid(const CSegment& seg) const
00690 {
00691     if ( seg.m_SegType == eSeqRef ) {
00692         return static_cast<const CSeq_id&>(*x_GetObject(seg));
00693     }
00694     NCBI_THROW(CSeqMapException, eSegmentTypeError,
00695                "Invalid segment type");
00696 }
00697 
00698 
00699 TSeqPos CSeqMap::x_GetRefPosition(const CSegment& seg) const
00700 {
00701     return seg.m_RefPosition;
00702 }
00703 
00704 
00705 bool CSeqMap::x_GetRefMinusStrand(const CSegment& seg) const
00706 {
00707     return seg.m_RefMinusStrand;
00708 }
00709 
00710 
00711 CSeqMap_CI CSeqMap::Begin(CScope* scope) const
00712 {
00713     return CSeqMap_CI(CConstRef<CSeqMap>(this), scope, SSeqMapSelector());
00714 }
00715 
00716 
00717 CSeqMap_CI CSeqMap::End(CScope* scope) const
00718 {
00719     return CSeqMap_CI(CConstRef<CSeqMap>(this), scope, SSeqMapSelector(),
00720                       kMax_UInt);
00721 }
00722 
00723 
00724 CSeqMap_CI CSeqMap::FindSegment(TSeqPos pos, CScope* scope) const
00725 {
00726     return CSeqMap_CI(CConstRef<CSeqMap>(this), scope, SSeqMapSelector(), pos);
00727 }
00728 
00729 
00730 CSeqMap::const_iterator CSeqMap::begin(CScope* scope) const
00731 {
00732     return Begin(scope);
00733 }
00734 
00735 
00736 CSeqMap::const_iterator CSeqMap::end(CScope* scope) const
00737 {
00738     return End(scope);
00739 }
00740 
00741 
00742 CSeqMap_CI CSeqMap::BeginResolved(CScope*                scope,
00743                                   const SSeqMapSelector& sel) const
00744 {
00745     return CSeqMap_CI(CConstRef<CSeqMap>(this), scope, sel);
00746 }
00747 
00748 
00749 CSeqMap_CI CSeqMap::BeginResolved(CScope* scope) const
00750 {
00751     SSeqMapSelector sel;
00752     sel.SetResolveCount(kMax_UInt);
00753     return CSeqMap_CI(CConstRef<CSeqMap>(this), scope, sel);
00754 }
00755 
00756 
00757 CSeqMap_CI CSeqMap::EndResolved(CScope* scope) const
00758 {
00759     SSeqMapSelector sel;
00760     sel.SetResolveCount(kMax_UInt);
00761     return CSeqMap_CI(CConstRef<CSeqMap>(this), scope, sel, kMax_UInt);
00762 }
00763 
00764 
00765 CSeqMap_CI CSeqMap::EndResolved(CScope*                scope,
00766                                 const SSeqMapSelector& sel) const
00767 {
00768     return CSeqMap_CI(CConstRef<CSeqMap>(this), scope, sel, kMax_UInt);
00769 }
00770 
00771 
00772 CSeqMap_CI CSeqMap::FindResolved(CScope*                scope,
00773                                  TSeqPos                pos,
00774                                  const SSeqMapSelector& selector) const
00775 {
00776     return CSeqMap_CI(CConstRef<CSeqMap>(this), scope, selector, pos);
00777 }
00778 
00779 
00780 CSeqMap_CI CSeqMap::ResolvedRangeIterator(CScope* scope,
00781                                           TSeqPos from,
00782                                           TSeqPos length,
00783                                           ENa_strand strand,
00784                                           size_t maxResolveCount,
00785                                           TFlags flags) const
00786 {
00787     SSeqMapSelector sel;
00788     sel.SetFlags(flags).SetResolveCount(maxResolveCount);
00789     sel.SetRange(from, length).SetStrand(strand);
00790     return CSeqMap_CI(CConstRef<CSeqMap>(this), scope, sel);
00791 }
00792 
00793 
00794 bool CSeqMap::HasSegmentOfType(ESegmentType type) const
00795 {
00796     if ( m_HasSegments == 0 ) {
00797         THasSegments flags = 0;
00798         ITERATE ( TSegments, it, m_Segments ) {
00799             flags |= 1<<it->m_SegType;
00800         }
00801         m_HasSegments = flags;
00802     }
00803     return bool((m_HasSegments >> type) & 1);
00804 }
00805 
00806 
00807 size_t CSeqMap::CountSegmentsOfType(ESegmentType type) const
00808 {
00809     size_t count = 0;
00810     ITERATE ( TSegments, it, m_Segments ) {
00811         if ( it->m_SegType == type ) {
00812             ++count;
00813         }
00814     }
00815     return count;
00816 }
00817 
00818 
00819 bool CSeqMap::CanResolveRange(CScope* scope,
00820                               TSeqPos from,
00821                               TSeqPos length,
00822                               ENa_strand strand,
00823                               size_t depth,
00824                               TFlags flags) const
00825 {
00826     SSeqMapSelector sel;
00827     sel.SetFlags(flags).SetResolveCount(depth);
00828     sel.SetRange(from, length).SetStrand(strand);
00829     return CanResolveRange(scope, sel);
00830 }
00831 
00832 
00833 namespace {
00834     struct PByLoader {
00835         static CDataLoader* Get(const CRef<CTSE_Chunk_Info>& c) {
00836             return &c->GetSplitInfo().GetDataLoader();
00837         }
00838         bool operator()(const CRef<CTSE_Chunk_Info>& c1,
00839                         const CRef<CTSE_Chunk_Info>& c2) const {
00840             const CTSE_Split_Info* s1 = &c1->GetSplitInfo();
00841             const CTSE_Split_Info* s2 = &c2->GetSplitInfo();
00842             CDataLoader* l1 = &s1->GetDataLoader();
00843             CDataLoader* l2 = &s2->GetDataLoader();
00844             if ( l1 != l2 ) {
00845                 return l1 < l2;
00846             }
00847             if ( s1 != s2 ) {
00848                 return s1 < s2;
00849             }
00850             return c1->GetChunkId() < c2->GetChunkId();
00851         }
00852     };
00853 }
00854 
00855 
00856 bool CSeqMap::CanResolveRange(CScope* scope, const SSeqMapSelector& sel) const
00857 {
00858     try {
00859         TSeqPos length = kInvalidSeqPos;
00860         if ( scope ) {
00861             length = GetLength(scope);
00862         }
00863         TSeqPos start = sel.m_Position;
00864         if ( start >= length ) {
00865             return false;
00866         }
00867         TSeqPos stop = length;
00868         if ( sel.m_Length != kInvalidSeqPos ) {
00869             stop = start + sel.m_Length;
00870             if ( stop < start || stop > length ) {
00871                 return false;
00872             }
00873         }
00874         TSeqPos found_length = 0;
00875         
00876         if ( scope && sel.m_LinkUsedTSE && sel.m_TopTSE &&
00877              !sel.x_HasLimitTSE() ) {
00878             // Faster BFS search with batch load requests.
00879             // We will do it only if loaded data will be locked in scope.
00880             // That's why we verify that scope exists, and start TSE is set.
00881             bool deeper = true;
00882             size_t next_depth = 0;
00883             vector<CTSE_Handle> all_tse;
00884             vector<CTSE_Handle> parent_tse;
00885             vector<CSeq_id_Handle> next_ids;
00886             CDataLoader::TChunkSet load_chunks, other_chunks;
00887             SSeqMapSelector next_sel(sel);
00888             while ( deeper ) {
00889                 deeper = false;
00890                 if ( next_depth > sel.m_MaxResolveCount ) {
00891                     break;
00892                 }
00893                 next_sel.SetResolveCount(next_depth);
00894                 next_sel.SetFlags(fFindAnyLeaf);
00895                 parent_tse.clear();
00896                 next_ids.clear();
00897                 load_chunks.clear();
00898                 {{
00899                     CSeqMap_CI it(ConstRef(this), scope, next_sel);
00900                     for(; it; ++it) {
00901                         if ( it.m_Selector.m_MaxResolveCount != 0 ) {
00902                             continue;
00903                         }
00904                         if ( it.GetType() == eSeqRef ) {
00905                             parent_tse.push_back(it.x_GetSegmentInfo().m_TSE);
00906                             _ASSERT(parent_tse.back());
00907                             next_ids.push_back(it.GetRefSeqid());
00908                             _ASSERT(next_ids.back());
00909                         }
00910                         else {
00911                             found_length += it.GetLength();
00912                             CRef<CTSE_Chunk_Info> chunk = it.x_GetSeqMap()
00913                                 .x_GetChunkToLoad(it.x_GetSegment());
00914                             if ( chunk ) {
00915                                 load_chunks.push_back(chunk);
00916                             }
00917                         }
00918                     }
00919                     if ( it.GetPosition() < stop ) {
00920                         return false;
00921                     }
00922                 }}
00923                 if ( !load_chunks.empty() ) {
00924                     sort(load_chunks.begin(), load_chunks.end(), PByLoader());
00925                     load_chunks.erase(unique(load_chunks.begin(),
00926                                              load_chunks.end()),
00927                                       load_chunks.end());
00928                     CDataLoader* first_loader = PByLoader::Get(load_chunks[0]);
00929                     CDataLoader* last_loader;
00930                     while ( (last_loader=PByLoader::Get(load_chunks.back())) !=
00931                             first_loader ){
00932                         other_chunks.clear();
00933                         while ( PByLoader::Get(load_chunks.back()) ==
00934                                 last_loader ) {
00935                             other_chunks.push_back(load_chunks.back());
00936                             load_chunks.pop_back();
00937                         }
00938                         last_loader->GetChunks(other_chunks);
00939                     }
00940                     first_loader->GetChunks(load_chunks);
00941                 }
00942                 if ( !next_ids.empty() ) {
00943                     deeper = true;
00944                     vector<CBioseq_Handle> seqs =
00945                         scope->GetBioseqHandles(next_ids);
00946                     _ASSERT(seqs.size() == parent_tse.size());
00947                     for ( size_t i = 0; i < seqs.size(); ++i ) {
00948                         if ( !seqs[i] ) {
00949                             return false;
00950                         }
00951                         const CTSE_Handle& tse = seqs[i].GetTSE_Handle();
00952                         all_tse.push_back(tse);
00953                         if ( !parent_tse[i].AddUsedTSE(tse) ) {
00954                             sel.AddUsedTSE(tse);
00955                         }
00956                     }
00957                 }
00958                 ++next_depth;
00959             }
00960         }
00961         else {
00962             CSeqMap_CI it(ConstRef(this), scope, sel);
00963             for(; it; ++it) {
00964                 found_length += it.GetLength();
00965             }
00966             if ( it.GetPosition() < stop ) {
00967                 return false;
00968             }
00969         }
00970         if ( stop != kInvalidSeqPos && stop-start != found_length ) {
00971             return false;
00972         }
00973         return true;
00974     }
00975     catch (exception&) {
00976         return false;
00977     }
00978 }
00979 
00980 
00981 CRef<CSeqMap> CSeqMap::CreateSeqMapForBioseq(const CBioseq& seq)
00982 {
00983     return Ref(new CSeqMap(seq.GetInst()));
00984 }
00985 
00986 
00987 CRef<CSeqMap> CSeqMap::CloneFor(const CBioseq& seq) const
00988 {
00989     return CreateSeqMapForBioseq(seq);
00990     /*
00991     CMutexGuard guard(m_SeqMap_Mtx);
00992     CRef<CSeqMap> ret;
00993     const CSeq_inst& inst = seq.GetInst();
00994     if ( inst.IsSetSeq_data() ) {
00995         ret.Reset(new CSeqMap_Seq_data(inst));
00996     }
00997     else if ( inst.IsSetExt() ) {
00998         const CSeq_ext& ext = inst.GetExt();
00999         switch (ext.Which()) {
01000         case CSeq_ext::e_Seg:
01001             ret.Reset(new CSeqMap_Seq_locs(ext.GetSeg(),
01002                                            ext.GetSeg().Get()));
01003             break;
01004         case CSeq_ext::e_Ref:
01005             ret.Reset(new CSeqMap(ext.GetRef()));
01006             break;
01007         case CSeq_ext::e_Delta:
01008             ret.Reset(new CSeqMap_Delta_seqs(ext.GetDelta()));
01009             break;
01010         case CSeq_ext::e_Map:
01011             //### Not implemented
01012             NCBI_THROW(CSeqMapException, eUnimplemented,
01013                        "CSeq_ext::e_Map -- not implemented");
01014         default:
01015             //### Not implemented
01016             NCBI_THROW(CSeqMapException, eUnimplemented,
01017                        "CSeq_ext::??? -- not implemented");
01018         }
01019     }
01020     else if ( inst.GetRepr() == CSeq_inst::eRepr_virtual ) {
01021         // Virtual sequence -- no data, no segments
01022         // The total sequence is gap
01023         ret.Reset(new CSeqMap(inst.GetLength()));
01024     }
01025     else if ( inst.GetRepr() != CSeq_inst::eRepr_not_set && 
01026               inst.IsSetLength() && inst.GetLength() != 0 ) {
01027         // split seq-data
01028         ret.Reset(new CSeqMap_Seq_data(inst));
01029     }
01030     else {
01031         if ( inst.GetRepr() != CSeq_inst::eRepr_not_set ) {
01032             NCBI_THROW(CSeqMapException, eDataError,
01033                        "CSeq_inst.repr of sequence without data "
01034                        "should be not_set");
01035         }
01036         if ( inst.IsSetLength() && inst.GetLength() != 0 ) {
01037             NCBI_THROW(CSeqMapException, eDataError,
01038                        "CSeq_inst.length of sequence without data "
01039                        "should be 0");
01040         }
01041         ret.Reset(new CSeqMap(TSeqPos(0)));
01042     }
01043     ret->m_Mol = inst.GetMol();
01044     if ( inst.IsSetLength() ) {
01045         ret->m_SeqLength = inst.GetLength();
01046     }
01047     return ret;
01048     */
01049 }
01050 
01051 
01052 CRef<CSeqMap> CSeqMap::CreateSeqMapForSeq_loc(const CSeq_loc& loc,
01053                                               CScope* scope)
01054 {
01055     TMol mol = CSeq_inst::eMol_not_set;
01056     CRef<CSeqMap> ret(new CSeqMap(loc));
01057     if ( scope && ret->m_Mol == CSeq_inst::eMol_not_set ) {
01058         if ( mol == CSeq_inst::eMol_not_set ) {
01059             for ( size_t i = 1; ; ++i ) {
01060                 const CSegment& seg = ret->x_GetSegment(i);
01061                 if ( seg.m_SegType == eSeqEnd ) {
01062                     break;
01063                 }
01064                 else if ( seg.m_SegType == eSeqRef ) {
01065                     CBioseq_Handle bh =
01066                         scope->GetBioseqHandle(ret->x_GetRefSeqid(seg));
01067                     if ( bh ) {
01068                         mol = bh.GetSequenceType();
01069                         break;
01070                     }
01071                 }
01072             }
01073         }
01074         ret->m_Mol = mol;
01075     }
01076     return ret;
01077 }
01078 
01079 
01080 CConstRef<CSeqMap> CSeqMap::GetSeqMapForSeq_loc(const CSeq_loc& loc,
01081                                                 CScope* scope)
01082 {
01083     TMol mol = CSeq_inst::eMol_not_set;
01084     if ( scope ) {
01085         if ( loc.IsInt() ) {
01086             const CSeq_interval& locint = loc.GetInt();
01087             if ( locint.GetFrom() == 0 &&
01088                  (!locint.IsSetStrand() || IsForward(locint.GetStrand())) ) {
01089                 CBioseq_Handle bh = scope->GetBioseqHandle(locint.GetId());
01090                 if ( bh ) {
01091                     if ( bh.GetBioseqLength() == locint.GetTo()+1 ) {
01092                         return ConstRef(&bh.GetSeqMap());
01093                     }
01094                     mol = bh.GetSequenceType();
01095                 }
01096             }
01097         }
01098         else if ( loc.IsWhole() ) {
01099             CBioseq_Handle bh = scope->GetBioseqHandle(loc.GetWhole());
01100             if ( bh ) {
01101                 return ConstRef(&bh.GetSeqMap());
01102             }
01103         }
01104     }
01105     CRef<CSeqMap> ret(new CSeqMap(loc));
01106     if ( scope && ret->m_Mol == CSeq_inst::eMol_not_set ) {
01107         if ( mol == CSeq_inst::eMol_not_set ) {
01108             for ( size_t i = 1; ; ++i ) {
01109                 const CSegment& seg = ret->x_GetSegment(i);
01110                 if ( seg.m_SegType == eSeqEnd ) {
01111                     break;
01112                 }
01113                 else if ( seg.m_SegType == eSeqRef ) {
01114                     CBioseq_Handle bh =
01115                         scope->GetBioseqHandle(ret->x_GetRefSeqid(seg));
01116                     if ( bh ) {
01117                         mol = bh.GetSequenceType();
01118                         break;
01119                     }
01120                 }
01121             }
01122         }
01123         ret->m_Mol = mol;
01124     }
01125     return ret;
01126 }
01127 
01128 
01129 inline
01130 void CSeqMap::x_AddSegment(ESegmentType type,
01131                            TSeqPos len,
01132                            bool unknown_len)
01133 {
01134     m_Segments.push_back(CSegment(type, len, unknown_len));
01135 }
01136 
01137 
01138 void CSeqMap::x_AddSegment(ESegmentType type, TSeqPos len,
01139                            const CObject* object)
01140 {
01141     x_AddSegment(type, len);
01142     CSegment& ret = m_Segments.back();
01143     ret.m_RefObject.Reset(object);
01144 }
01145 
01146 
01147 void CSeqMap::x_AddSegment(ESegmentType type,
01148                            const CObject* object,
01149                            TSeqPos refPos,
01150                            TSeqPos len,
01151                            ENa_strand strand)
01152 {
01153     x_AddSegment(type, len, object);
01154     CSegment& ret = m_Segments.back();
01155     ret.m_RefPosition = refPos;
01156     ret.m_RefMinusStrand = IsReverse(strand);
01157 }
01158 
01159 
01160 void CSeqMap::x_AddEnd(void)
01161 {
01162     TSeqPos pos = kInvalidSeqPos;
01163     if ( m_Segments.empty() ) {
01164         m_Segments.reserve(3);
01165         pos = 0;
01166     }
01167     x_AddSegment(eSeqEnd, 0);
01168     CSegment& ret = m_Segments.back();
01169     ret.m_Position = pos;
01170 }
01171 
01172 
01173 void CSeqMap::x_AddGap(TSeqPos len, bool unknown_len)
01174 {
01175     x_AddSegment(eSeqGap, len, unknown_len);
01176 }
01177 
01178 
01179 void CSeqMap::x_AddGap(TSeqPos len, bool unknown_len,
01180                        const CSeq_data& gap_data)
01181 {
01182     x_AddSegment(eSeqGap, len, unknown_len);
01183     CSegment& ret = m_Segments.back();
01184     ret.m_ObjType = eSeqData;
01185     ret.m_RefObject = &gap_data;
01186 }
01187 
01188 
01189 void CSeqMap::x_AddUnloadedSeq_data(TSeqPos len)
01190 {
01191     x_AddSegment(eSeqData, len);
01192 }
01193 
01194 
01195 void CSeqMap::x_Add(const CSeq_data& data, TSeqPos len)
01196 {
01197     x_AddSegment(eSeqData, len, &data);
01198 }
01199 
01200 
01201 void CSeqMap::x_Add(const CSeq_point& ref)
01202 {
01203     x_AddSegment(eSeqRef, &ref.GetId(),
01204                  ref.GetPoint(), 1,
01205                  ref.IsSetStrand()? ref.GetStrand(): eNa_strand_unknown);
01206 }
01207 
01208 
01209 void CSeqMap::x_Add(const CSeq_interval& ref)
01210 {
01211     x_AddSegment(eSeqRef, &ref.GetId(),
01212                  ref.GetFrom(), ref.GetLength(),
01213                  ref.IsSetStrand()? ref.GetStrand(): eNa_strand_unknown);
01214 }
01215 
01216 
01217 void CSeqMap::x_Add(const CSeq_id& ref)
01218 {
01219     x_AddSegment(eSeqRef, &ref, 0, kInvalidSeqPos);
01220 }
01221 
01222 /*
01223 CSeqMap::CSegment& CSeqMap::x_Add(CSeqMap* submap)
01224 {
01225     return x_AddSegment(eSeqSubMap, kInvalidSeqPos, submap);
01226 }
01227 */
01228 
01229 void CSeqMap::x_Add(const CPacked_seqint& seq)
01230 {
01231     ITERATE ( CPacked_seqint::Tdata, it, seq.Get() ) {
01232         x_Add(**it);
01233     }
01234     //return x_Add(new CSeqMap_Seq_intervals(seq));
01235 }
01236 
01237 
01238 void CSeqMap::x_Add(const CPacked_seqpnt& seq)
01239 {
01240     const CSeq_id& id = seq.GetId();
01241     ENa_strand strand = seq.IsSetStrand()? seq.GetStrand(): eNa_strand_unknown;
01242     ITERATE ( CPacked_seqpnt::TPoints, it, seq.GetPoints() ) {
01243         x_AddSegment(eSeqRef, &id, *it, 1, strand);
01244     }
01245     //return x_Add(new CSeqMap_SeqPoss(seq));
01246 }
01247 
01248 
01249 void CSeqMap::x_Add(const CSeq_loc_mix& seq)
01250 {
01251     ITERATE ( CSeq_loc_mix::Tdata, it, seq.Get() ) {
01252         x_Add(**it);
01253     }
01254     //return x_Add(new CSeqMap_Seq_locs(seq, seq.Get()));
01255 }
01256 
01257 
01258 void CSeqMap::x_Add(const CSeq_loc_equiv& seq)
01259 {
01260     ITERATE ( CSeq_loc_equiv::Tdata, it, seq.Get() ) {
01261         x_Add(**it);
01262     }
01263     //return x_Add(new CSeqMap_Seq_locs(seq, seq.Get()));
01264 }
01265 
01266 
01267 void CSeqMap::x_Add(const CSeq_literal& seq)
01268 {
01269     if ( !seq.IsSetSeq_data() ) {
01270         // No data exist - treat it like a gap
01271         x_AddGap(seq.GetLength(), seq.CanGetFuzz()); //???
01272     }
01273     else if ( seq.GetSeq_data().IsGap() ) {
01274         // Seq-data.gap
01275         x_AddGap(seq.GetLength(), seq.CanGetFuzz(), seq.GetSeq_data());
01276     }
01277     else {
01278         x_Add(seq.GetSeq_data(), seq.GetLength());
01279     }
01280 }
01281 
01282 
01283 void CSeqMap::x_Add(const CSeq_loc& loc)
01284 {
01285     switch ( loc.Which() ) {
01286     case CSeq_loc::e_not_set:
01287     case CSeq_loc::e_Null:
01288     case CSeq_loc::e_Empty:
01289         x_AddGap(0, false); // Add gap ???
01290         break;
01291     case CSeq_loc::e_Whole:
01292         x_Add(loc.GetWhole());
01293         break;
01294     case CSeq_loc::e_Int:
01295         x_Add(loc.GetInt());
01296         break;
01297     case CSeq_loc::e_Pnt:
01298         x_Add(loc.GetPnt());
01299         break;
01300     case CSeq_loc::e_Packed_int:
01301         x_Add(loc.GetPacked_int());
01302         break;
01303     case CSeq_loc::e_Packed_pnt:
01304         x_Add(loc.GetPacked_pnt());
01305         break;
01306     case CSeq_loc::e_Mix:
01307         x_Add(loc.GetMix());
01308         break;
01309     case CSeq_loc::e_Equiv:
01310         x_Add(loc.GetEquiv());
01311         break;
01312     case CSeq_loc::e_Bond:
01313         NCBI_THROW(CSeqMapException, eDataError,
01314                    "e_Bond is not allowed as a reference type");
01315     case CSeq_loc::e_Feat:
01316         NCBI_THROW(CSeqMapException, eDataError,
01317                    "e_Feat is not allowed as a reference type");
01318     default:
01319         NCBI_THROW(CSeqMapException, eDataError,
01320                    "invalid reference type");
01321     }
01322 }
01323 
01324 
01325 void CSeqMap::x_Add(const CDelta_seq& seq)
01326 {
01327     switch ( seq.Which() ) {
01328     case CDelta_seq::e_Loc:
01329         x_Add(seq.GetLoc());
01330         break;
01331     case CDelta_seq::e_Literal:
01332         x_Add(seq.GetLiteral());
01333         break;
01334     default:
01335         NCBI_THROW(CSeqMapException, eDataError,
01336                    "Can not add empty Delta-seq");
01337     }
01338 }
01339 
01340 
01341 void CSeqMap::SetRegionInChunk(CTSE_Chunk_Info& chunk,
01342                                TSeqPos pos, TSeqPos length)
01343 {
01344     if ( length == kInvalidSeqPos ) {
01345         _ASSERT(pos == 0);
01346         _ASSERT(m_SeqLength != kInvalidSeqPos);
01347         length = m_SeqLength;
01348     }
01349     size_t index = x_FindSegment(pos, 0);
01350     CMutexGuard guard(m_SeqMap_Mtx);
01351     while ( length ) {
01352         // get segment
01353         if ( index > x_GetLastEndSegmentIndex() ) {
01354             x_GetSegmentException(index);
01355         }
01356         CSegment& seg = x_SetSegment(index);
01357 
01358         // update segment position if not set yet
01359         if ( index > m_Resolved ) {
01360             _ASSERT(index == m_Resolved + 1);
01361             _ASSERT(seg.m_Position == kInvalidSeqPos || seg.m_Position == pos);
01362             seg.m_Position = pos;
01363             m_Resolved = index;
01364         }
01365         // check segment
01366         if ( seg.m_Position != pos || seg.m_Length > length ) {
01367             NCBI_THROW(CSeqMapException, eDataError,
01368                        "SeqMap segment crosses split chunk boundary");
01369         }
01370         if ( seg.m_SegType != eSeqGap ) {
01371             NCBI_THROW(CSeqMapException, eDataError,
01372                        "split chunk covers bad SeqMap segment");
01373         }
01374         _ASSERT(!seg.m_RefObject);
01375 
01376         if ( seg.m_Length > 0 ) {
01377             // update segment
01378             seg.m_SegType = eSeqData;
01379             x_SetChunk(seg, chunk);
01380             
01381             // next
01382             pos += seg.m_Length;
01383             length -= seg.m_Length;
01384         }
01385         ++index;
01386     }
01387 }
01388 
01389 
01390 bool CSeqMap::x_DoUpdateSeq_inst(CSeq_inst& inst)
01391 {
01392     inst.SetLength(GetLength(0));
01393     bool single_segment = GetSegmentsCount() == 1;
01394     if ( HasSegmentOfType(eSeqData) ) {
01395         if ( single_segment && !inst.IsSetExt() ) {
01396             // seq-data
01397             CSegment& seg = x_SetSegment(x_GetFirstEndSegmentIndex() + 1);
01398             _ASSERT(seg.m_SegType == eSeqData);
01399             inst.SetSeq_data(const_cast<CSeq_data&>(x_GetSeq_data(seg)));
01400             inst.ResetExt();
01401             return true;
01402         }
01403     }
01404     else if ( HasSegmentOfType(eSeqGap) ) {
01405         if ( single_segment && !inst.IsSetExt() ) {
01406             inst.SetRepr(CSeq_inst::eRepr_virtual);
01407             inst.ResetSeq_data();
01408             inst.ResetExt();
01409             return true;
01410         }
01411     }
01412     else {
01413         if ( !inst.IsSetExt() || inst.GetExt().IsSeg() ) {
01414             // ref only -> CSeg_ext
01415             CSeg_ext::Tdata& data = inst.SetExt().SetSeg().Set();
01416             CSeg_ext::Tdata::iterator iter = data.begin();
01417             for ( size_t index = x_GetFirstEndSegmentIndex() + 1;
01418                   index < x_GetLastEndSegmentIndex(); ++index ) {
01419                 CSegment& seg = x_SetSegment(index);
01420                 _ASSERT(seg.m_SegType == eSeqRef);
01421                 if ( iter == data.end() ) {
01422                     iter = data.insert(iter, CSeg_ext::Tdata::value_type());
01423                 }
01424                 if ( !*iter ) {
01425                     iter->Reset(new CSeq_loc);
01426                 }
01427                 CSeq_loc& loc = **iter;
01428                 ++iter;
01429                 CSeq_interval& interval = loc.SetInt();
01430                 interval.SetId(const_cast<CSeq_id&>(x_GetRefSeqid(seg)));
01431                 TSeqPos pos = seg.m_RefPosition;
01432                 interval.SetFrom(pos);
01433                 interval.SetTo(pos+x_GetSegmentLength(index, 0)-1);
01434                 if ( seg.m_RefMinusStrand ) {
01435                     interval.SetStrand(eNa_strand_minus);
01436                 }
01437                 else {
01438                     interval.ResetStrand();
01439                 }
01440                 interval.ResetFuzz_from();
01441                 interval.ResetFuzz_to();
01442             }
01443             data.erase(iter, data.end());
01444             return true;
01445         }
01446     }
01447 
01448     // delta
01449     CDelta_ext::Tdata& delta = inst.SetExt().SetDelta().Set();
01450     CDelta_ext::Tdata::iterator iter = delta.begin();
01451     for ( size_t index = x_GetFirstEndSegmentIndex() + 1;
01452           index < x_GetLastEndSegmentIndex(); ++index ) {
01453         CSegment& seg = x_SetSegment(index);
01454         if ( iter == delta.end() ) {
01455             iter = delta.insert(iter, CDelta_ext::Tdata::value_type());
01456         }
01457         if ( !*iter ) {
01458             iter->Reset(new CDelta_seq);
01459         }
01460         CDelta_seq& dseq = **iter;
01461         ++iter;
01462         if ( seg.m_SegType == eSeqData ) {
01463             CSeq_literal& lit = dseq.SetLiteral();
01464             lit.SetLength(x_GetSegmentLength(index, 0));
01465             lit.SetSeq_data(const_cast<CSeq_data&>(x_GetSeq_data(seg)));
01466             lit.ResetFuzz();
01467         }
01468         else if ( seg.m_SegType == eSeqGap ) {
01469             CSeq_literal& lit = dseq.SetLiteral();
01470             lit.SetLength(x_GetSegmentLength(index, 0));
01471             lit.ResetSeq_data();
01472             lit.ResetFuzz();
01473         }
01474         else {
01475             _ASSERT(seg.m_SegType == eSeqRef);
01476             CSeq_loc& loc = dseq.SetLoc();
01477             CSeq_interval& interval = loc.SetInt();
01478             interval.SetId(const_cast<CSeq_id&>(x_GetRefSeqid(seg)));
01479             TSeqPos pos = seg.m_RefPosition;
01480             interval.SetFrom(pos);
01481             interval.SetTo(pos+x_GetSegmentLength(index, 0)-1);
01482             if ( seg.m_RefMinusStrand ) {
01483                 interval.SetStrand(eNa_strand_minus);
01484             }
01485             else {
01486                 interval.ResetStrand();
01487             }
01488             interval.ResetFuzz_from();
01489             interval.ResetFuzz_to();
01490         }
01491     }
01492     delta.erase(iter, delta.end());
01493     return true;
01494 }
01495 
01496 
01497 void CSeqMap::SetRepr(CSeq_inst::TRepr repr)
01498 {
01499 }
01500 
01501 
01502 void CSeqMap::ResetRepr(void)
01503 {
01504 }
01505 
01506 
01507 void CSeqMap::SetMol(CSeq_inst::TMol mol)
01508 {
01509     m_Mol = mol;
01510 }
01511 
01512 
01513 void CSeqMap::ResetMol(void)
01514 {
01515     m_Mol = CSeq_inst::eMol_not_set;
01516 }
01517 
01518 
01519 END_SCOPE(objects)
01520 END_NCBI_SCOPE
01521 
01522 

Generated on Wed Dec 9 04:27:22 2009 for NCBI C++ ToolKit by  doxygen 1.4.6
Modified on Wed Dec 09 08:18:00 2009 by modify_doxy.py rev. 173732