NCBI C++ ToolKit
sparse_functions.cpp
Go to the documentation of this file.
00001 /*  $Id: sparse_functions.cpp 23970 2011-06-27 18:15:09Z kuznets $
00002  * ===========================================================================
00003  *
00004  *                            PUBLIC DOMAIN NOTICE
00005  *               National Center for Biotechnology Information
00006  *
00007  *  This software/database is a "United States Government Work" under the
00008  *  terms of the United States Copyright Act.  It was written as part of
00009  *  the author's official duties as a United States Government employee and
00010  *  thus cannot be copyrighted.  This software/database is freely available
00011  *  to the public for use. The National Library of Medicine and the U.S.
00012  *  Government have not placed any restriction on its use or reproduction.
00013  *
00014  *  Although all reasonable efforts have been taken to ensure the accuracy
00015  *  and reliability of the software and data, the NLM and the U.S.
00016  *  Government do not and cannot warrant the performance or results that
00017  *  may be obtained by using this software or data. The NLM and the U.S.
00018  *  Government disclaim all warranties, express or implied, including
00019  *  warranties of performance, merchantability or fitness for any particular
00020  *  purpose.
00021  *
00022  *  Please cite the author in any work or product based on this material.
00023  *
00024  * ===========================================================================
00025  *
00026  * Authors:  Andrey Yazhuk
00027  *
00028  * File Description:
00029  *
00030  */
00031 
00032 #include <ncbi_pch.hpp>
00033 
00034 #include <gui/widgets/data/sparse_functions.hpp>
00035 
00036 #include <corelib/ncbitime.hpp>
00037 #include <objects/seqalign/Sparse_align.hpp>
00038 #include <objects/seqalign/Seq_align.hpp>
00039 #include <objects/seqloc/Seq_id.hpp>
00040 
00041 
00042 BEGIN_NCBI_SCOPE
00043 USING_SCOPE(ncbi::objects);
00044 
00045 
00046 // Conversion function CSparse_align -> SAlignedSeq
00047 SAlignedSeq* CreateAlignRow(const CSparse_align& align, bool master_first)
00048 {
00049     auto_ptr<SAlignedSeq> aln_seq(new SAlignedSeq());
00050     aln_seq->m_SeqId.Reset(master_first ? &align.GetSecond_id()
00051                                         : &align.GetFirst_id());
00052     SAlignedSeq::TSignedRange& range = aln_seq->m_SecondRange;
00053 
00054     // get references to the containers inside CSparse_align
00055     const CSparse_align::TFirst_starts& starts_1 = align.GetFirst_starts();
00056     const CSparse_align::TSecond_starts& starts_2 = align.GetSecond_starts();
00057     const CSparse_align::TLens& lens = align.GetLens();
00058     const CSparse_align::TSecond_strands* strands =
00059         align.IsSetSecond_strands() ? &align.GetSecond_strands() : 0;
00060 
00061     // create a new Align Collection
00062     SAlignedSeq::TAlignColl* coll = new SAlignedSeq::TAlignColl();
00063     range.SetFrom(0).SetLength(0);
00064     SAlignedSeq::TPos aln_from = -1, from = -1;
00065 
00066     // iterate on Sparse-seg elements
00067     typedef CSparse_align::TNumseg TNumseg;
00068     for( TNumseg i = 0;  i < align.GetNumseg(); i++  )  {
00069         aln_from = master_first ? starts_1[i] : starts_2[i];
00070         from = master_first ? starts_2[i] : starts_1[i];
00071         SAlignedSeq::TPos len = lens[i];
00072         bool dir = strands ? ((*strands)[i] == eNa_strand_plus) : true;
00073 
00074         // update range
00075         if(coll->empty())    {
00076             range.SetFrom(aln_from);
00077             range.SetLength(len);
00078         } else {
00079             range.SetFrom(min(range.GetFrom(), aln_from));
00080             range.SetToOpen(max(range.GetToOpen(), aln_from + len));
00081         }
00082 
00083         coll->insert(SAlignedSeq::TAlignRange(aln_from, from, len, dir));
00084     }
00085     aln_seq->m_AlignColl = coll;
00086 
00087     int dir = (coll->GetFlags() & SAlignedSeq::TAlignColl::fMixedDir);
00088     if(dir == SAlignedSeq::TAlignColl::fMixedDir)    {
00089         // incorrect - do not return anything
00090         return NULL;
00091     } else if(dir == SAlignedSeq::TAlignColl::fReversed) {
00092         aln_seq->m_NegativeStrand = true;
00093     }
00094     return aln_seq.release();
00095 }
00096 
00097 /// Converter
00098 bool ConvertToPairwise(const CSeq_align& align,
00099                        const CSeq_id& master_id,
00100                        vector<SAlignedSeq*>& aln_seqs)
00101 {
00102     typedef CSeq_align::TSegs TSegs;
00103     const TSegs& segs = align.GetSegs();
00104 
00105     switch(segs.Which())    {
00106     case TSegs::e_Denseg: {
00107         const CDense_seg& dense_seg = segs.GetDenseg();
00108         // find the row corresponding to master_id
00109         const CDense_seg::TIds& ids = dense_seg.GetIds();
00110         for( CDense_seg::TDim row = 0;  row < dense_seg.GetDim();  row++ )  {
00111             if(ids[row]->Equals(master_id)) {
00112                 return ConvertToPairwise(dense_seg, row, aln_seqs);
00113             }
00114         }
00115         return false;
00116     }
00117     /// add code to support other types of Segs
00118     default:
00119         return false;
00120     }
00121 }
00122 
00123 
00124 /// Converter
00125 bool ConvertToPairwise(const vector< CConstRef<objects::CSeq_align> >& aligns,
00126                        const CSeq_id& master_id,
00127                        vector<SAlignedSeq*>& aln_seqs)
00128 {
00129     CStopWatch timer;
00130     timer.Start();
00131 
00132     bool ok = false;
00133     for( size_t i = 0;  i < aligns.size();  i++ )   {
00134         const CSeq_align& align = *aligns[i];
00135         bool res = ConvertToPairwise(align, master_id, aln_seqs);
00136         ok |= res;
00137     }
00138     LOG_POST("ConvertToPairwise( vector of CSeq_align) " << 1000 * timer.Elapsed() << " ms");
00139     return ok;
00140 }
00141 
00142 
00143 /// Converter CSparse_seg -> SAlignedSeq-s
00144 bool ConvertToPairwise(const CSparse_seg& sparse_seg, vector<SAlignedSeq*>& aln_seqs)
00145 {
00146     CConstRef<objects::CSeq_id> master_id(&sparse_seg.GetMaster_id());
00147 
00148     typedef CSparse_seg::TRows  TRows;
00149     const TRows& rows = sparse_seg.GetRows();
00150     TRows::const_iterator it = rows.begin();
00151 
00152     // convert pairwise alignment to TAlignColl objects
00153     for(  ;  it != rows.end();  ++it    )   {
00154         const CSparse_align& align = **it;
00155 
00156         int master_index = -1;
00157         if(master_id->Compare(align.GetFirst_id()) == CSeq_id::e_YES) {
00158             master_index = 0;
00159         } else if(master_id->Compare(align.GetSecond_id()) == CSeq_id::e_YES) {
00160             master_index = 1;
00161         }
00162 
00163         if(master_index != -1)  {   // create an alignment row from this CSparse_align
00164             SAlignedSeq* aln_seq = CreateAlignRow(align, master_index == 0);
00165             if(aln_seq) {
00166                 aln_seqs.push_back(aln_seq);
00167             }
00168         } else {
00169             LOG_POST(Error << "CreateAlignRow() - a CSparse_align is"
00170                      << "invalid, neither of its CSeq_ids match master id");
00171         }
00172     }
00173     return true; // handle errors
00174 }
00175 
00176 
00177 bool ConvertToPairwise(const CDense_seg& dense_seg,
00178                        CDense_seg::TDim anchor_row,
00179                        vector<SAlignedSeq*>& aln_seqs)
00180 {
00181     typedef CDense_seg::TDim    TDim;
00182 
00183     for(TDim row = 0;  row < dense_seg.GetDim();  row++ )   {
00184         if(row != anchor_row)   {
00185             SAlignedSeq* aln_seq = CreateAlignRow(dense_seg, anchor_row, row);
00186             if(aln_seq) {
00187                 aln_seqs.push_back(aln_seq);
00188             }
00189         }
00190     }
00191     return true;
00192 }
00193 
00194 
00195 /// Builder function
00196 CSparseAlignment* BuildSparseAlignment(const CSeq_id& master_id,
00197                                        vector<SAlignedSeq*>& aln_seqs,
00198                                        objects::CScope& scope)
00199 {
00200     if(! aln_seqs.empty()) {
00201         CSparseAlignment* aln = new CSparseAlignment();
00202         aln->Init(master_id, aln_seqs, scope);
00203         return aln;
00204     }
00205     return NULL;
00206 }
00207 
00208 
00209 /// Builder function
00210 CAlnVec* BuildDenseAlignment(const CSeq_id& master_id,
00211                              vector<SAlignedSeq*>& aln_seqs,
00212                              objects::CScope& scope)
00213 {
00214     if(! aln_seqs.empty()) {
00215     }
00216     return NULL;
00217 }
00218 
00219 
00220 /// Converter Helper function
00221 /// Creates an Align Collection from the two rows of a CDense_seg
00222 SAlignedSeq*  CreateAlignRow(const CDense_seg& dense_seg,
00223                                 CDense_seg::TDim row_1,
00224                                 CDense_seg::TDim row_2)
00225 {
00226     _ASSERT(row_1 >=0  &&  row_1 < dense_seg.GetDim());
00227     _ASSERT(row_2 >=0  &&  row_2 < dense_seg.GetDim());
00228 
00229     auto_ptr<SAlignedSeq> aln_seq(new SAlignedSeq());
00230     aln_seq->m_SeqId.Reset(dense_seg.GetIds()[row_2]);
00231     SAlignedSeq::TSignedRange& range = aln_seq->m_SecondRange;
00232 
00233     aln_seq->m_AlignColl = new SAlignedSeq::TAlignColl();
00234     SAlignedSeq::TAlignColl& coll = *aln_seq->m_AlignColl;
00235 
00236     typedef CDense_seg::TDim TDim;
00237     typedef CDense_seg::TNumseg TNum;
00238 
00239     const CDense_seg::TStarts& starts = dense_seg.GetStarts();
00240     const CDense_seg::TLens& lens = dense_seg.GetLens();
00241     const CDense_seg::TStrands* strands =
00242         dense_seg.IsSetStrands() ? &dense_seg.GetStrands() : NULL;
00243 
00244     // iterate by segements and add aligned segments to the collection
00245     TDim n_rows = dense_seg.GetDim();
00246     TNum n_seg = dense_seg.GetNumseg();
00247     for( TNum i = 0; i < n_seg;  i++ )  {
00248         int offset = i * n_rows;
00249         int from_1 = starts[row_1 + offset];
00250         int from_2 = starts[row_2 + offset];
00251 
00252         if(from_1 != -1  &&  from_2 != -1)  { // not a gap
00253             int len = lens[i];
00254             bool direct = true;
00255             if(strands) {
00256                 bool minus_1 = (*strands)[row_1 + offset] == eNa_strand_minus;
00257                 bool minus_2 = (*strands)[row_2 + offset] == eNa_strand_minus;
00258                 direct = (! minus_1  &&  ! minus_2)  ||  (minus_1 == minus_2);
00259             }
00260             coll.insert(SAlignTools::TAlignRange(from_1, from_2, len, direct));
00261 
00262             // update range
00263             if(coll.empty())    {
00264                 range.SetFrom(from_1);
00265                 range.SetLength(len);
00266             } else {
00267                 range.SetFrom(min(range.GetFrom(), from_1));
00268                 range.SetToOpen(max(range.GetToOpen(), from_1 + len));
00269             }
00270         }
00271     }
00272     //LOG_POST("GetAlignColl() rows [" << row_1 << ", " << row_2 << "]" << ",  segments " << coll.size());
00273 
00274     _ASSERT((coll.GetFlags() & SAlignTools::TAlignColl::fInvalid) == 0);
00275     return aln_seq.release();
00276 }
00277 
00278 
00279 /// Creates Align Collection from a CSparse_seg
00280 void GetAlignColl(const CSparse_align& sparse_align,
00281                   const CSeq_id& master_id,
00282                   SAlignTools::TAlignColl& coll)
00283 {
00284     coll.clear();
00285 
00286     int index = -1;
00287     if(master_id.Compare(sparse_align.GetFirst_id()) == CSeq_id::e_YES) {
00288         index = 0;
00289     } else if(master_id.Compare(sparse_align.GetSecond_id()) == CSeq_id::e_YES) {
00290         index = 1;
00291     }
00292     if(index != -1) {
00293         bool first = (index == 0);
00294         const CSparse_align::TFirst_starts& starts_1 = sparse_align.GetFirst_starts();
00295         const CSparse_align::TFirst_starts& starts_2 = sparse_align.GetSecond_starts();
00296         const CSparse_align::TLens& lens = sparse_align.GetLens();
00297         const CSparse_align::TSecond_strands* strands =
00298             sparse_align.IsSetSecond_strands() ? &sparse_align.GetSecond_strands() : 0;
00299 
00300         typedef CSparse_align::TNumseg TNumseg;
00301         TNumseg n_seg = sparse_align.GetNumseg();
00302         for( TNumseg i = 0;  i < n_seg;  i++ )  {
00303             int from_1 = first ? starts_1[i] : starts_2[i];
00304             int from_2 = first ? starts_2[i] : starts_1[i];
00305             int len = lens[i];
00306             bool direct = strands  &&  ((*strands)[i] == eNa_strand_minus);
00307 
00308             coll.insert(SAlignTools::TAlignRange(from_1, from_2, len, direct));
00309         }
00310     }
00311 }
00312 
00313 
00314 /// Reverse Converter
00315 /// Converts Align Collection into a CSparse_align
00316 CRef<CSparse_align> CreateSparseAlign(const CSeq_id& id_1,
00317                                       const CSeq_id& id_2,
00318                                       const SAlignTools::TAlignColl& coll)
00319 {
00320     CRef<CSparse_align> align(new CSparse_align());
00321 
00322     CRef<CSeq_id> rid_1(new CSeq_id());
00323     rid_1->Assign(id_1);
00324     align->SetFirst_id(*rid_1);
00325 
00326     CRef<CSeq_id> rid_2(new CSeq_id());
00327     rid_2->Assign(id_2);
00328     align->SetSecond_id(*rid_2);
00329 
00330     // initilize containers
00331     typedef CSparse_align::TNumseg TNumseg;
00332     TNumseg n_seg = (TNumseg)coll.size();
00333     align->SetNumseg(n_seg);
00334 
00335     CSparse_align::TFirst_starts& starts_1 = align->SetFirst_starts();
00336     starts_1.resize(n_seg);
00337     CSparse_align::TFirst_starts& starts_2 = align->SetSecond_starts();
00338     starts_2.resize(n_seg);
00339     CSparse_align::TLens& lens = align->SetLens();
00340     lens.resize(n_seg);
00341 
00342     CSparse_align::TSecond_strands* strands = NULL;
00343     if(coll.GetFlags()  &  SAlignTools::TAlignColl::fReversed) {
00344         // there are reversed segments in the collection - need to fill "Strands"
00345         strands = &align->SetSecond_strands();
00346         strands->resize(n_seg);
00347     }
00348 
00349     // move data to the containers
00350     TNumseg i = 0;
00351     ITERATE(SAlignTools::TAlignColl, it, coll)   {
00352         const SAlignTools::TAlignRange& r = *it;
00353 
00354         starts_1[i] = r.GetFirstFrom();
00355         starts_2[i] = r.GetSecondFrom();
00356         lens[i] = r.GetLength();
00357         if(strands)  {
00358             (*strands)[i] = r.IsDirect() ? eNa_strand_plus : eNa_strand_minus;
00359         }
00360         i++;
00361     }
00362 
00363     return align;
00364 }
00365 
00366 
00367 END_NCBI_SCOPE
Modified on Wed May 23 13:23:09 2012 by modify_doxy.py rev. 337098