|
NCBI C++ ToolKit
|
00001 /* $Id: sparse_functions.cpp 23970 2011-06-27 18:15:09Z kuznets $ 00002 * =========================================================================== 00003 * 00004 * PUBLIC DOMAIN NOTICE 00005 * National Center for Biotechnology Information 00006 * 00007 * This software/database is a "United States Government Work" under the 00008 * terms of the United States Copyright Act. It was written as part of 00009 * the author's official duties as a United States Government employee and 00010 * thus cannot be copyrighted. This software/database is freely available 00011 * to the public for use. The National Library of Medicine and the U.S. 00012 * Government have not placed any restriction on its use or reproduction. 00013 * 00014 * Although all reasonable efforts have been taken to ensure the accuracy 00015 * and reliability of the software and data, the NLM and the U.S. 00016 * Government do not and cannot warrant the performance or results that 00017 * may be obtained by using this software or data. The NLM and the U.S. 00018 * Government disclaim all warranties, express or implied, including 00019 * warranties of performance, merchantability or fitness for any particular 00020 * purpose. 00021 * 00022 * Please cite the author in any work or product based on this material. 00023 * 00024 * =========================================================================== 00025 * 00026 * Authors: Andrey Yazhuk 00027 * 00028 * File Description: 00029 * 00030 */ 00031 00032 #include <ncbi_pch.hpp> 00033 00034 #include <gui/widgets/data/sparse_functions.hpp> 00035 00036 #include <corelib/ncbitime.hpp> 00037 #include <objects/seqalign/Sparse_align.hpp> 00038 #include <objects/seqalign/Seq_align.hpp> 00039 #include <objects/seqloc/Seq_id.hpp> 00040 00041 00042 BEGIN_NCBI_SCOPE 00043 USING_SCOPE(ncbi::objects); 00044 00045 00046 // Conversion function CSparse_align -> SAlignedSeq 00047 SAlignedSeq* CreateAlignRow(const CSparse_align& align, bool master_first) 00048 { 00049 auto_ptr<SAlignedSeq> aln_seq(new SAlignedSeq()); 00050 aln_seq->m_SeqId.Reset(master_first ? &align.GetSecond_id() 00051 : &align.GetFirst_id()); 00052 SAlignedSeq::TSignedRange& range = aln_seq->m_SecondRange; 00053 00054 // get references to the containers inside CSparse_align 00055 const CSparse_align::TFirst_starts& starts_1 = align.GetFirst_starts(); 00056 const CSparse_align::TSecond_starts& starts_2 = align.GetSecond_starts(); 00057 const CSparse_align::TLens& lens = align.GetLens(); 00058 const CSparse_align::TSecond_strands* strands = 00059 align.IsSetSecond_strands() ? &align.GetSecond_strands() : 0; 00060 00061 // create a new Align Collection 00062 SAlignedSeq::TAlignColl* coll = new SAlignedSeq::TAlignColl(); 00063 range.SetFrom(0).SetLength(0); 00064 SAlignedSeq::TPos aln_from = -1, from = -1; 00065 00066 // iterate on Sparse-seg elements 00067 typedef CSparse_align::TNumseg TNumseg; 00068 for( TNumseg i = 0; i < align.GetNumseg(); i++ ) { 00069 aln_from = master_first ? starts_1[i] : starts_2[i]; 00070 from = master_first ? starts_2[i] : starts_1[i]; 00071 SAlignedSeq::TPos len = lens[i]; 00072 bool dir = strands ? ((*strands)[i] == eNa_strand_plus) : true; 00073 00074 // update range 00075 if(coll->empty()) { 00076 range.SetFrom(aln_from); 00077 range.SetLength(len); 00078 } else { 00079 range.SetFrom(min(range.GetFrom(), aln_from)); 00080 range.SetToOpen(max(range.GetToOpen(), aln_from + len)); 00081 } 00082 00083 coll->insert(SAlignedSeq::TAlignRange(aln_from, from, len, dir)); 00084 } 00085 aln_seq->m_AlignColl = coll; 00086 00087 int dir = (coll->GetFlags() & SAlignedSeq::TAlignColl::fMixedDir); 00088 if(dir == SAlignedSeq::TAlignColl::fMixedDir) { 00089 // incorrect - do not return anything 00090 return NULL; 00091 } else if(dir == SAlignedSeq::TAlignColl::fReversed) { 00092 aln_seq->m_NegativeStrand = true; 00093 } 00094 return aln_seq.release(); 00095 } 00096 00097 /// Converter 00098 bool ConvertToPairwise(const CSeq_align& align, 00099 const CSeq_id& master_id, 00100 vector<SAlignedSeq*>& aln_seqs) 00101 { 00102 typedef CSeq_align::TSegs TSegs; 00103 const TSegs& segs = align.GetSegs(); 00104 00105 switch(segs.Which()) { 00106 case TSegs::e_Denseg: { 00107 const CDense_seg& dense_seg = segs.GetDenseg(); 00108 // find the row corresponding to master_id 00109 const CDense_seg::TIds& ids = dense_seg.GetIds(); 00110 for( CDense_seg::TDim row = 0; row < dense_seg.GetDim(); row++ ) { 00111 if(ids[row]->Equals(master_id)) { 00112 return ConvertToPairwise(dense_seg, row, aln_seqs); 00113 } 00114 } 00115 return false; 00116 } 00117 /// add code to support other types of Segs 00118 default: 00119 return false; 00120 } 00121 } 00122 00123 00124 /// Converter 00125 bool ConvertToPairwise(const vector< CConstRef<objects::CSeq_align> >& aligns, 00126 const CSeq_id& master_id, 00127 vector<SAlignedSeq*>& aln_seqs) 00128 { 00129 CStopWatch timer; 00130 timer.Start(); 00131 00132 bool ok = false; 00133 for( size_t i = 0; i < aligns.size(); i++ ) { 00134 const CSeq_align& align = *aligns[i]; 00135 bool res = ConvertToPairwise(align, master_id, aln_seqs); 00136 ok |= res; 00137 } 00138 LOG_POST("ConvertToPairwise( vector of CSeq_align) " << 1000 * timer.Elapsed() << " ms"); 00139 return ok; 00140 } 00141 00142 00143 /// Converter CSparse_seg -> SAlignedSeq-s 00144 bool ConvertToPairwise(const CSparse_seg& sparse_seg, vector<SAlignedSeq*>& aln_seqs) 00145 { 00146 CConstRef<objects::CSeq_id> master_id(&sparse_seg.GetMaster_id()); 00147 00148 typedef CSparse_seg::TRows TRows; 00149 const TRows& rows = sparse_seg.GetRows(); 00150 TRows::const_iterator it = rows.begin(); 00151 00152 // convert pairwise alignment to TAlignColl objects 00153 for( ; it != rows.end(); ++it ) { 00154 const CSparse_align& align = **it; 00155 00156 int master_index = -1; 00157 if(master_id->Compare(align.GetFirst_id()) == CSeq_id::e_YES) { 00158 master_index = 0; 00159 } else if(master_id->Compare(align.GetSecond_id()) == CSeq_id::e_YES) { 00160 master_index = 1; 00161 } 00162 00163 if(master_index != -1) { // create an alignment row from this CSparse_align 00164 SAlignedSeq* aln_seq = CreateAlignRow(align, master_index == 0); 00165 if(aln_seq) { 00166 aln_seqs.push_back(aln_seq); 00167 } 00168 } else { 00169 LOG_POST(Error << "CreateAlignRow() - a CSparse_align is" 00170 << "invalid, neither of its CSeq_ids match master id"); 00171 } 00172 } 00173 return true; // handle errors 00174 } 00175 00176 00177 bool ConvertToPairwise(const CDense_seg& dense_seg, 00178 CDense_seg::TDim anchor_row, 00179 vector<SAlignedSeq*>& aln_seqs) 00180 { 00181 typedef CDense_seg::TDim TDim; 00182 00183 for(TDim row = 0; row < dense_seg.GetDim(); row++ ) { 00184 if(row != anchor_row) { 00185 SAlignedSeq* aln_seq = CreateAlignRow(dense_seg, anchor_row, row); 00186 if(aln_seq) { 00187 aln_seqs.push_back(aln_seq); 00188 } 00189 } 00190 } 00191 return true; 00192 } 00193 00194 00195 /// Builder function 00196 CSparseAlignment* BuildSparseAlignment(const CSeq_id& master_id, 00197 vector<SAlignedSeq*>& aln_seqs, 00198 objects::CScope& scope) 00199 { 00200 if(! aln_seqs.empty()) { 00201 CSparseAlignment* aln = new CSparseAlignment(); 00202 aln->Init(master_id, aln_seqs, scope); 00203 return aln; 00204 } 00205 return NULL; 00206 } 00207 00208 00209 /// Builder function 00210 CAlnVec* BuildDenseAlignment(const CSeq_id& master_id, 00211 vector<SAlignedSeq*>& aln_seqs, 00212 objects::CScope& scope) 00213 { 00214 if(! aln_seqs.empty()) { 00215 } 00216 return NULL; 00217 } 00218 00219 00220 /// Converter Helper function 00221 /// Creates an Align Collection from the two rows of a CDense_seg 00222 SAlignedSeq* CreateAlignRow(const CDense_seg& dense_seg, 00223 CDense_seg::TDim row_1, 00224 CDense_seg::TDim row_2) 00225 { 00226 _ASSERT(row_1 >=0 && row_1 < dense_seg.GetDim()); 00227 _ASSERT(row_2 >=0 && row_2 < dense_seg.GetDim()); 00228 00229 auto_ptr<SAlignedSeq> aln_seq(new SAlignedSeq()); 00230 aln_seq->m_SeqId.Reset(dense_seg.GetIds()[row_2]); 00231 SAlignedSeq::TSignedRange& range = aln_seq->m_SecondRange; 00232 00233 aln_seq->m_AlignColl = new SAlignedSeq::TAlignColl(); 00234 SAlignedSeq::TAlignColl& coll = *aln_seq->m_AlignColl; 00235 00236 typedef CDense_seg::TDim TDim; 00237 typedef CDense_seg::TNumseg TNum; 00238 00239 const CDense_seg::TStarts& starts = dense_seg.GetStarts(); 00240 const CDense_seg::TLens& lens = dense_seg.GetLens(); 00241 const CDense_seg::TStrands* strands = 00242 dense_seg.IsSetStrands() ? &dense_seg.GetStrands() : NULL; 00243 00244 // iterate by segements and add aligned segments to the collection 00245 TDim n_rows = dense_seg.GetDim(); 00246 TNum n_seg = dense_seg.GetNumseg(); 00247 for( TNum i = 0; i < n_seg; i++ ) { 00248 int offset = i * n_rows; 00249 int from_1 = starts[row_1 + offset]; 00250 int from_2 = starts[row_2 + offset]; 00251 00252 if(from_1 != -1 && from_2 != -1) { // not a gap 00253 int len = lens[i]; 00254 bool direct = true; 00255 if(strands) { 00256 bool minus_1 = (*strands)[row_1 + offset] == eNa_strand_minus; 00257 bool minus_2 = (*strands)[row_2 + offset] == eNa_strand_minus; 00258 direct = (! minus_1 && ! minus_2) || (minus_1 == minus_2); 00259 } 00260 coll.insert(SAlignTools::TAlignRange(from_1, from_2, len, direct)); 00261 00262 // update range 00263 if(coll.empty()) { 00264 range.SetFrom(from_1); 00265 range.SetLength(len); 00266 } else { 00267 range.SetFrom(min(range.GetFrom(), from_1)); 00268 range.SetToOpen(max(range.GetToOpen(), from_1 + len)); 00269 } 00270 } 00271 } 00272 //LOG_POST("GetAlignColl() rows [" << row_1 << ", " << row_2 << "]" << ", segments " << coll.size()); 00273 00274 _ASSERT((coll.GetFlags() & SAlignTools::TAlignColl::fInvalid) == 0); 00275 return aln_seq.release(); 00276 } 00277 00278 00279 /// Creates Align Collection from a CSparse_seg 00280 void GetAlignColl(const CSparse_align& sparse_align, 00281 const CSeq_id& master_id, 00282 SAlignTools::TAlignColl& coll) 00283 { 00284 coll.clear(); 00285 00286 int index = -1; 00287 if(master_id.Compare(sparse_align.GetFirst_id()) == CSeq_id::e_YES) { 00288 index = 0; 00289 } else if(master_id.Compare(sparse_align.GetSecond_id()) == CSeq_id::e_YES) { 00290 index = 1; 00291 } 00292 if(index != -1) { 00293 bool first = (index == 0); 00294 const CSparse_align::TFirst_starts& starts_1 = sparse_align.GetFirst_starts(); 00295 const CSparse_align::TFirst_starts& starts_2 = sparse_align.GetSecond_starts(); 00296 const CSparse_align::TLens& lens = sparse_align.GetLens(); 00297 const CSparse_align::TSecond_strands* strands = 00298 sparse_align.IsSetSecond_strands() ? &sparse_align.GetSecond_strands() : 0; 00299 00300 typedef CSparse_align::TNumseg TNumseg; 00301 TNumseg n_seg = sparse_align.GetNumseg(); 00302 for( TNumseg i = 0; i < n_seg; i++ ) { 00303 int from_1 = first ? starts_1[i] : starts_2[i]; 00304 int from_2 = first ? starts_2[i] : starts_1[i]; 00305 int len = lens[i]; 00306 bool direct = strands && ((*strands)[i] == eNa_strand_minus); 00307 00308 coll.insert(SAlignTools::TAlignRange(from_1, from_2, len, direct)); 00309 } 00310 } 00311 } 00312 00313 00314 /// Reverse Converter 00315 /// Converts Align Collection into a CSparse_align 00316 CRef<CSparse_align> CreateSparseAlign(const CSeq_id& id_1, 00317 const CSeq_id& id_2, 00318 const SAlignTools::TAlignColl& coll) 00319 { 00320 CRef<CSparse_align> align(new CSparse_align()); 00321 00322 CRef<CSeq_id> rid_1(new CSeq_id()); 00323 rid_1->Assign(id_1); 00324 align->SetFirst_id(*rid_1); 00325 00326 CRef<CSeq_id> rid_2(new CSeq_id()); 00327 rid_2->Assign(id_2); 00328 align->SetSecond_id(*rid_2); 00329 00330 // initilize containers 00331 typedef CSparse_align::TNumseg TNumseg; 00332 TNumseg n_seg = (TNumseg)coll.size(); 00333 align->SetNumseg(n_seg); 00334 00335 CSparse_align::TFirst_starts& starts_1 = align->SetFirst_starts(); 00336 starts_1.resize(n_seg); 00337 CSparse_align::TFirst_starts& starts_2 = align->SetSecond_starts(); 00338 starts_2.resize(n_seg); 00339 CSparse_align::TLens& lens = align->SetLens(); 00340 lens.resize(n_seg); 00341 00342 CSparse_align::TSecond_strands* strands = NULL; 00343 if(coll.GetFlags() & SAlignTools::TAlignColl::fReversed) { 00344 // there are reversed segments in the collection - need to fill "Strands" 00345 strands = &align->SetSecond_strands(); 00346 strands->resize(n_seg); 00347 } 00348 00349 // move data to the containers 00350 TNumseg i = 0; 00351 ITERATE(SAlignTools::TAlignColl, it, coll) { 00352 const SAlignTools::TAlignRange& r = *it; 00353 00354 starts_1[i] = r.GetFirstFrom(); 00355 starts_2[i] = r.GetSecondFrom(); 00356 lens[i] = r.GetLength(); 00357 if(strands) { 00358 (*strands)[i] = r.IsDirect() ? eNa_strand_plus : eNa_strand_minus; 00359 } 00360 i++; 00361 } 00362 00363 return align; 00364 } 00365 00366 00367 END_NCBI_SCOPE
1.7.5.1
Modified on Wed May 23 13:23:09 2012 by modify_doxy.py rev. 337098