NCBI C++ ToolKit
transform_align.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

00001 /*  $Id: transform_align.cpp 63536 2014-07-09 12:57:20Z mozese2 $
00002  * ===========================================================================
00003  *
00004  *                            PUBLIC DOMAIN NOTICE
00005  *               National Center for Biotechnology Information
00006  *
00007  *  This software/database is a "United States Government Work" under the
00008  *  terms of the United States Copyright Act.  It was written as part of
00009  *  the author's official duties as a United States Government employee and
00010  *  thus cannot be copyrighted.  This software/database is freely available
00011  *  to the public for use. The National Library of Medicine and the U.S.
00012  *  Government have not placed any restriction on its use or reproduction.
00013  *
00014  *  Although all reasonable efforts have been taken to ensure the accuracy
00015  *  and reliability of the software and data, the NLM and the U.S.
00016  *  Government do not and cannot warrant the performance or results that
00017  *  may be obtained by using this software or data. The NLM and the U.S.
00018  *  Government disclaim all warranties, express or implied, including
00019  *  warranties of performance, merchantability or fitness for any particular
00020  *  purpose.
00021  *
00022  *  Please cite the author in any work or product based on this material.
00023  *
00024  * ===========================================================================
00025  *
00026  * Authors:  Vyacheslav Chetvernin
00027  *
00028  * File Description: Alignment transformations
00029  *
00030  */
00031 #include <ncbi_pch.hpp>
00032 #include <algo/sequence/gene_model.hpp>
00033 #include <objects/seqalign/seqalign__.hpp>
00034 #include <objects/seqloc/Na_strand.hpp>
00035 #include <objects/general/User_object.hpp>
00036 #include <objects/general/Object_id.hpp>
00037 
00038 #include <objmgr/bioseq_handle.hpp>
00039 #include <objmgr/scope.hpp>
00040 #include <objmgr/feat_ci.hpp>
00041 #include <objmgr/util/sequence.hpp>
00042 
00043 #include <objtools/alnmgr/score_builder_base.hpp>
00044 
00045 #include "feature_generator.hpp"
00046 
00047 BEGIN_NCBI_SCOPE
00048 
00049 USING_SCOPE(objects);
00050 
00051 
00052 namespace {
00053 
00054 pair <ENa_strand, ENa_strand> GetSplicedStrands(const CSpliced_seg& spliced_seg)
00055 {
00056     ENa_strand product_strand =
00057         spliced_seg.IsSetProduct_strand() ?
00058         spliced_seg.GetProduct_strand() :
00059         (spliced_seg.GetExons().front()->IsSetProduct_strand() ?
00060          spliced_seg.GetExons().front()->GetProduct_strand() :
00061          eNa_strand_unknown);
00062     ENa_strand genomic_strand =
00063         spliced_seg.IsSetGenomic_strand() ?
00064         spliced_seg.GetGenomic_strand() :
00065         (spliced_seg.GetExons().front()->IsSetGenomic_strand()?
00066          spliced_seg.GetExons().front()->GetGenomic_strand():
00067          eNa_strand_unknown);
00068 
00069     return make_pair(product_strand, genomic_strand);
00070 }
00071 
00072 void SetProtpos(CProduct_pos &pos, int value)
00073 {
00074     pos.SetProtpos().SetAmin(value/3);
00075     pos.SetProtpos().SetFrame((value % 3) +1);
00076 }
00077 
00078 }
00079 
00080 
00081 void CFeatureGenerator::SImplementation::GetExonStructure(const CSpliced_seg& spliced_seg, vector<SExon>& exons, CScope* scope)
00082 {
00083     pair <ENa_strand, ENa_strand> strands = GetSplicedStrands(spliced_seg);
00084     ENa_strand product_strand = strands.first;
00085     ENa_strand genomic_strand = strands.second;
00086 
00087     exons.resize(spliced_seg.GetExons().size());
00088     int i = 0;
00089     TSignedSeqPos prev_genomic_pos = 0;
00090     TSignedSeqPos offset = 0;
00091     ITERATE(CSpliced_seg::TExons, it, spliced_seg.GetExons()) {
00092         const CSpliced_exon& exon = **it;
00093         SExon& exon_struct = exons[i++];
00094 
00095         const CProduct_pos& prod_from = exon.GetProduct_start();
00096         const CProduct_pos& prod_to = exon.GetProduct_end();
00097 
00098         exon_struct.prod_from = prod_from.AsSeqPos();
00099         exon_struct.prod_to = prod_to.AsSeqPos();
00100         if (product_strand == eNa_strand_minus) {
00101             swap(exon_struct.prod_from, exon_struct.prod_to);
00102             exon_struct.prod_from = -exon_struct.prod_from;
00103             exon_struct.prod_to = -exon_struct.prod_to;
00104         }
00105 
00106         exon_struct.genomic_from = exon.GetGenomic_start();
00107         exon_struct.genomic_to = exon.GetGenomic_end();
00108 
00109         bool cross_the_origin = i > 1 && (
00110             genomic_strand != eNa_strand_minus
00111             ? (exon_struct.genomic_from < prev_genomic_pos)
00112             : (exon_struct.genomic_from > prev_genomic_pos));
00113 
00114         if (cross_the_origin && scope) {
00115             offset = scope->GetSequenceLength(spliced_seg.GetGenomic_id());
00116         }
00117 
00118         prev_genomic_pos = exon_struct.genomic_from;
00119         
00120         if (genomic_strand == eNa_strand_minus) {
00121             swap(exon_struct.genomic_from, exon_struct.genomic_to);
00122             exon_struct.genomic_from = -exon_struct.genomic_from;
00123             exon_struct.genomic_to = -exon_struct.genomic_to;
00124         }
00125 
00126         if (offset) {
00127             exon_struct.genomic_from += offset;
00128             exon_struct.genomic_to += offset;
00129         }
00130 
00131     }
00132 
00133     _ASSERT( exons.size() == spliced_seg.GetExons().size() );
00134 }
00135 
00136 
00137 void CFeatureGenerator::SImplementation::StitchSmallHoles(CSeq_align& align)
00138 {
00139     CSpliced_seg& spliced_seg = align.SetSegs().SetSpliced();
00140 
00141     if (!spliced_seg.CanGetExons() || spliced_seg.GetExons().size() < 2)
00142         return;
00143 
00144     vector<SExon> exons;
00145     GetExonStructure(spliced_seg, exons, m_scope);
00146 
00147     bool is_protein = (spliced_seg.GetProduct_type()==CSpliced_seg::eProduct_type_protein);
00148 
00149     pair <ENa_strand, ENa_strand> strands = GetSplicedStrands(spliced_seg);
00150     ENa_strand product_strand = strands.first;
00151     ENa_strand genomic_strand = strands.second;
00152 
00153     int product_min_pos;
00154     int product_max_pos;
00155     if (product_strand != eNa_strand_minus) {
00156         product_min_pos = 0;
00157         if (spliced_seg.IsSetPoly_a()) {
00158             product_max_pos = spliced_seg.GetPoly_a()-1;
00159         } else if (spliced_seg.IsSetProduct_length()) {
00160             product_max_pos = spliced_seg.GetProduct_length()-1;
00161             if (is_protein)
00162                 product_max_pos *= 3+2;
00163         } else {
00164             product_max_pos = exons.back().prod_to;
00165         }
00166     } else {
00167         if (spliced_seg.IsSetProduct_length()) {
00168             product_min_pos = -int(spliced_seg.GetProduct_length())+1;
00169             if (is_protein)
00170                 product_min_pos = product_min_pos*3-2;
00171         } else {
00172             product_min_pos = exons[0].prod_from;
00173         }
00174         if (spliced_seg.IsSetPoly_a()) {
00175             product_max_pos = -int(spliced_seg.GetPoly_a())+1;
00176         } else {
00177             product_max_pos = 0;
00178         }
00179     }
00180 
00181     CSpliced_seg::TExons::iterator it = spliced_seg.SetExons().begin();
00182     CRef<CSpliced_exon> prev_exon = *it;
00183     size_t i = 1;
00184     for (++it; it != spliced_seg.SetExons().end();  ++i, prev_exon = *it++) {
00185         CSpliced_exon& exon = **it;
00186 
00187         bool donor_set = prev_exon->IsSetDonor_after_exon();
00188         bool acceptor_set = exon.IsSetAcceptor_before_exon();
00189 
00190         if(donor_set && acceptor_set && exons[i-1].prod_to + 1 == exons[i].prod_from) {
00191             continue;
00192         }
00193 
00194         _ASSERT( exons[i].prod_from > exons[i-1].prod_to );
00195         int prod_hole_len = exons[i].prod_from - exons[i-1].prod_to -1;
00196         _ASSERT( exons[i].genomic_from > exons[i-1].genomic_to );
00197         int genomic_hole_len = exons[i].genomic_from - exons[i-1].genomic_to -1;
00198 
00199         if (prod_hole_len >= (int)m_min_intron || genomic_hole_len >= (int)m_min_intron)
00200             continue;
00201 
00202         if (!prev_exon->IsSetParts() || prev_exon->GetParts().empty()) {
00203             CRef< CSpliced_exon_chunk > part(new CSpliced_exon_chunk);
00204             part->SetMatch(exons[i-1].prod_to-exons[i-1].prod_from+1);
00205             prev_exon->SetParts().push_back(part);
00206         }
00207         if (!exon.IsSetParts() || exon.GetParts().empty()) {
00208             CRef< CSpliced_exon_chunk > part(new CSpliced_exon_chunk);
00209             part->SetMatch(exons[i].prod_to-exons[i].prod_from+1);
00210             exon.SetParts().push_back(part);
00211         }
00212 
00213         int max_hole_len = max(prod_hole_len, genomic_hole_len);
00214         int min_hole_len = min(prod_hole_len, genomic_hole_len);
00215         int left_mismatch_len = 0;
00216         int right_mismatch_len = min_hole_len;
00217         if (prod_hole_len != genomic_hole_len) {
00218             // does not matter for transcripts, but for proteins ensures insersions at codon boundary
00219             int bases_needed_to_complete_codon = 2 - (exons[i-1].prod_to % 3);
00220         
00221             if (right_mismatch_len >= bases_needed_to_complete_codon) {
00222                 left_mismatch_len = bases_needed_to_complete_codon + ((right_mismatch_len-bases_needed_to_complete_codon)/2/3)*3;
00223                 right_mismatch_len -= left_mismatch_len;
00224             }
00225         }
00226 
00227         bool no_acceptor_before = i > 1 && !prev_exon->IsSetAcceptor_before_exon();
00228         bool no_donor_after = i < exons.size()-1 && !exon.IsSetDonor_after_exon();
00229 
00230 
00231         bool cross_the_origin =
00232             genomic_strand != eNa_strand_minus
00233             ? (prev_exon->GetGenomic_start() > exon.GetGenomic_start())
00234             : (prev_exon->GetGenomic_start() < exon.GetGenomic_start());
00235 
00236         if (cross_the_origin) {
00237             int genomic_size = m_scope->GetSequenceLength(spliced_seg.GetGenomic_id());
00238 
00239             prev_exon->SetPartial(product_min_pos < exons[i-1].prod_from  &&
00240                                   no_acceptor_before);
00241 
00242             exon.SetPartial(exons[i].prod_to < product_max_pos &&
00243                             no_donor_after);
00244 
00245             if (genomic_strand != eNa_strand_minus) {
00246                 prev_exon->SetGenomic_end(genomic_size-1);
00247                 exon.SetGenomic_start(0);
00248             } else {
00249                 prev_exon->SetGenomic_start(0);
00250                 exon.SetGenomic_end(genomic_size-1);
00251             }
00252 
00253             int origin = genomic_strand != eNa_strand_minus ? genomic_size : 1;
00254             int to_origin = origin - exons[i-1].genomic_to -1;
00255             if (prod_hole_len == genomic_hole_len) {
00256                 left_mismatch_len = to_origin;
00257                 right_mismatch_len -= left_mismatch_len;
00258             }
00259 
00260             if (left_mismatch_len > 0 && to_origin > 0) {
00261                 int mismatch_len = min(left_mismatch_len, to_origin);
00262                 CRef< CSpliced_exon_chunk > part(new CSpliced_exon_chunk);
00263                 part->SetMismatch(mismatch_len);
00264                 prev_exon->SetParts().push_back(part);
00265                 prod_hole_len -= mismatch_len;
00266                 genomic_hole_len -= mismatch_len;
00267                 to_origin -= mismatch_len;
00268                 exons[i-1].genomic_to += mismatch_len;
00269                 exons[i-1].prod_to += mismatch_len;
00270                 left_mismatch_len -= mismatch_len;
00271             }
00272 
00273             if (to_origin > 0) {
00274                 _ASSERT(left_mismatch_len == 0);
00275                 _ASSERT(prod_hole_len != genomic_hole_len);
00276                 CRef< CSpliced_exon_chunk > part(new CSpliced_exon_chunk);
00277                 if (prod_hole_len < genomic_hole_len) {
00278                     int genomic_ins = min(genomic_hole_len-prod_hole_len, to_origin);
00279                     part->SetGenomic_ins(genomic_ins);
00280                     genomic_hole_len -= genomic_ins;
00281                     to_origin -= genomic_ins;
00282                     exons[i-1].genomic_to += genomic_ins;
00283                 } else {
00284                     part->SetProduct_ins(prod_hole_len-genomic_hole_len);
00285                     exons[i-1].prod_to += prod_hole_len-genomic_hole_len;
00286                     prod_hole_len = genomic_hole_len;
00287                 }
00288                 prev_exon->SetParts().push_back(part);
00289             }
00290             if (to_origin > 0) {
00291                 _ASSERT(prod_hole_len == genomic_hole_len);
00292                 _ASSERT(right_mismatch_len >= to_origin);
00293                 int mismatch_len = to_origin;
00294                 CRef< CSpliced_exon_chunk > part(new CSpliced_exon_chunk);
00295                 part->SetMismatch(mismatch_len);
00296                 prev_exon->SetParts().push_back(part);
00297                 prod_hole_len -= mismatch_len;
00298                 genomic_hole_len -= mismatch_len;
00299                 to_origin = 0;
00300                 exons[i-1].genomic_to += mismatch_len;
00301                 exons[i-1].prod_to += mismatch_len;
00302                 right_mismatch_len -= mismatch_len;
00303             }
00304 
00305             _ASSERT(to_origin == 0);
00306             _ASSERT(exons[i-1].genomic_to == origin-1);
00307 
00308             exons[i].prod_from = exons[i-1].prod_to+1;
00309             exons[i].genomic_from = exons[i-1].genomic_to+1;
00310 
00311             if (is_protein) {
00312                 prev_exon->SetProduct_end().SetProtpos().SetAmin() = exons[i-1].prod_to/3;
00313                 prev_exon->SetProduct_end().SetProtpos().SetFrame() = (exons[i-1].prod_to %3) +1;
00314                 exon.SetProduct_start().SetProtpos().SetAmin() = exons[i].prod_from/3;
00315                 exon.SetProduct_start().SetProtpos().SetFrame() = (exons[i].prod_from %3) +1;
00316             } else if (product_strand != eNa_strand_minus) {
00317                 prev_exon->SetProduct_end().SetNucpos( exons[i-1].prod_to );
00318                 exon.SetProduct_start().SetNucpos( exons[i].prod_from );
00319             } else {
00320                 prev_exon->SetProduct_start().SetNucpos( -exons[i-1].prod_to );
00321                 exon.SetProduct_end().SetNucpos( -exons[i].prod_from );
00322             }
00323 
00324             list <CRef< CSpliced_exon_chunk > >::iterator insertion_point = exon.SetParts().begin();
00325 
00326             if (left_mismatch_len > 0) {
00327                 CRef< CSpliced_exon_chunk > part(new CSpliced_exon_chunk);
00328                 part->SetMismatch(left_mismatch_len);
00329                 insertion_point = exon.SetParts().insert(insertion_point, part);
00330                 ++insertion_point;
00331             }
00332             if (prod_hole_len != genomic_hole_len) {
00333                 CRef< CSpliced_exon_chunk > part(new CSpliced_exon_chunk);
00334                 if (prod_hole_len < genomic_hole_len) {
00335                     part->SetGenomic_ins(genomic_hole_len - prod_hole_len);
00336                 } else {
00337                     part->SetProduct_ins(prod_hole_len - genomic_hole_len);
00338                 }
00339                 insertion_point = exon.SetParts().insert(insertion_point, part);
00340                 ++insertion_point;
00341             }
00342             if (right_mismatch_len > 0) {
00343                 CRef< CSpliced_exon_chunk > part(new CSpliced_exon_chunk);
00344                 part->SetMismatch(right_mismatch_len);
00345                 exon.SetParts().insert(insertion_point, part);
00346 
00347             }
00348 
00349         } else {
00350 
00351             if (is_protein || product_strand != eNa_strand_minus) {
00352                 prev_exon->SetProduct_end().Assign( exon.GetProduct_end() );
00353             } else {
00354                 prev_exon->SetProduct_start().Assign( exon.GetProduct_start() );
00355             }
00356         
00357             if (genomic_strand != eNa_strand_minus) {
00358                 prev_exon->SetGenomic_end() = exon.GetGenomic_end();
00359             } else {
00360                 prev_exon->SetGenomic_start() = exon.GetGenomic_start();
00361             }
00362 
00363             if (left_mismatch_len > 0) {
00364                 CRef< CSpliced_exon_chunk > part(new CSpliced_exon_chunk);
00365                 part->SetMismatch(left_mismatch_len);
00366                 prev_exon->SetParts().push_back(part);
00367             }
00368             if (prod_hole_len != genomic_hole_len) {
00369                CRef< CSpliced_exon_chunk > part(new CSpliced_exon_chunk);
00370                 if (prod_hole_len < genomic_hole_len) {
00371                     part->SetGenomic_ins(max_hole_len - min_hole_len);
00372                 } else {
00373                     part->SetProduct_ins(max_hole_len - min_hole_len);
00374                 }
00375                 prev_exon->SetParts().push_back(part);
00376             }
00377             if (right_mismatch_len > 0) {
00378                 CRef< CSpliced_exon_chunk > part(new CSpliced_exon_chunk);
00379                 part->SetMismatch(right_mismatch_len);
00380                 prev_exon->SetParts().push_back(part);
00381 
00382             }
00383             prev_exon->SetParts().splice(prev_exon->SetParts().end(), exon.SetParts());
00384 
00385             if (exon.IsSetDonor_after_exon()) {
00386                 prev_exon->SetDonor_after_exon().Assign( exon.GetDonor_after_exon() );
00387             } else {
00388                 prev_exon->ResetDonor_after_exon();
00389             }
00390 
00391             exons[i].prod_from = exons[i-1].prod_from;
00392             exons[i].genomic_from = exons[i-1].genomic_from;
00393 
00394             prev_exon->SetPartial(
00395                                   (product_min_pos < exons[i-1].prod_from  && no_acceptor_before) ||
00396                                   (exons[i].prod_to < product_max_pos  && no_donor_after));
00397 
00398             if (exon.IsSetExt()) {
00399                 prev_exon->SetExt().splice(prev_exon->SetExt().end(), exon.SetExt());
00400             }
00401 
00402             CSpliced_seg::TExons::iterator save_it = it;
00403             --save_it;
00404             spliced_seg.SetExons().erase(it);
00405             it = save_it;
00406         }
00407     }
00408 }
00409 
00410 vector<CFeatureGenerator::SImplementation::SExon> CFeatureGenerator::SImplementation::
00411 GetExons(const CSeq_align &align)
00412 {
00413     vector<SExon> exons;
00414     GetExonStructure(align.GetSegs().GetSpliced(), exons, NULL);
00415     return exons;
00416 }
00417 
00418 CSeq_align::EScoreType s_ScoresToRecalculate[] =
00419 { CSeq_align::eScore_IdentityCount,
00420   CSeq_align::eScore_MismatchCount,
00421   CSeq_align::eScore_GapCount,
00422   CSeq_align::eScore_PercentIdentity_Gapped, 
00423   CSeq_align::eScore_PercentIdentity_Ungapped, 
00424   CSeq_align::eScore_PercentCoverage,
00425   CSeq_align::eScore_HighQualityPercentCoverage,
00426   (CSeq_align::EScoreType)0
00427 };
00428 
00429 void CFeatureGenerator::SImplementation::
00430 ClearScores(CSeq_align &align)
00431 {
00432     NON_CONST_ITERATE (CSpliced_seg::TExons, exon_it,
00433                        align.SetSegs().SetSpliced().SetExons())
00434     {
00435         (*exon_it)->ResetScores();
00436     }
00437     if (align.IsSetScore()) {
00438         CScoreBuilderBase score_builder;
00439         for (CSeq_align::EScoreType *score = s_ScoresToRecalculate;
00440              *score; ++score)
00441         {
00442             align.ResetNamedScore(*score);
00443         }
00444         align.ResetNamedScore("weighted_identity");
00445 
00446         if (align.SetScore().empty()) {
00447             align.ResetScore();
00448         }
00449     }
00450 }
00451 
00452 
00453 void CFeatureGenerator::SImplementation::
00454 RecalculateScores(CSeq_align &align)
00455 {
00456     NON_CONST_ITERATE (CSpliced_seg::TExons, exon_it,
00457                        align.SetSegs().SetSpliced().SetExons())
00458     {
00459     RecalculateExonIdty(**exon_it);
00460     }
00461 
00462     if (align.IsSetScore()) {
00463         CScoreBuilderBase score_builder;
00464         for (CSeq_align::EScoreType *score = s_ScoresToRecalculate;
00465              *score; ++score)
00466         {
00467             int sink;
00468             if (align.GetNamedScore(*score, sink)) {
00469                 align.ResetNamedScore(*score);
00470                 score_builder.AddScore(*m_scope, align, *score);
00471             }
00472         }
00473         if (align.GetSegs().GetSpliced().GetProduct_type() ==
00474             CSpliced_seg::eProduct_type_transcript)
00475         {
00476             score_builder.AddSplignScores(align);
00477         }
00478         align.ResetNamedScore("weighted_identity");
00479     }
00480 }
00481 
00482 void CFeatureGenerator::SImplementation::
00483 RecalculateExonIdty(CSpliced_exon &exon)
00484 {
00485     if (!exon.IsSetScores())
00486         return;
00487 
00488     Int8 idty = -1;
00489     if (exon.IsSetParts()) {
00490         int matches = 0;
00491         int total = 0;
00492         ITERATE (CSpliced_exon::TParts, part_it, exon.GetParts()) {
00493             switch ((*part_it)->Which()) {
00494             case CSpliced_exon_chunk::e_Match:
00495                matches += (*part_it)->GetMatch();
00496                total += (*part_it)->GetMatch();
00497                break;
00498 
00499             case CSpliced_exon_chunk::e_Mismatch:
00500                total += (*part_it)->GetMismatch();
00501                break;
00502 
00503             case CSpliced_exon_chunk::e_Product_ins:
00504                total += (*part_it)->GetProduct_ins();
00505                break;
00506 
00507             case CSpliced_exon_chunk::e_Genomic_ins:
00508                total += (*part_it)->GetGenomic_ins();
00509                break;
00510 
00511             default:
00512                 matches = INT_MIN; // to ensure negative identity
00513                 total += 1;        // to prevent division by zero
00514                 break;
00515             }
00516         }
00517         idty = matches * NCBI_CONST_INT8(10000000000) / total;
00518     }
00519 
00520     CScore_set::Tdata& exon_scores = exon.SetScores().Set();
00521     ERASE_ITERATE (CScore_set::Tdata, score_it, exon_scores) {
00522         if (idty >= 0 && (*score_it)->IsSetId() && (*score_it)->GetId().IsStr() &&
00523             (*score_it)->GetId().GetStr() == "idty") {
00524             (*score_it)->SetValue().SetReal(idty / 10000000000.);
00525         } else {
00526             exon_scores.erase(score_it);
00527         }
00528     }
00529 }
00530 
00531 void CFeatureGenerator::SImplementation::TrimHolesToCodons(CSeq_align& align)
00532 {
00533     CSpliced_seg& spliced_seg = align.SetSegs().SetSpliced();
00534 
00535     if (!spliced_seg.CanGetExons())
00536         return;
00537 
00538     bool is_protein = (spliced_seg.GetProduct_type()==CSpliced_seg::eProduct_type_protein);
00539 
00540     pair <ENa_strand, ENa_strand> strands = GetSplicedStrands(spliced_seg);
00541     ENa_strand product_strand = strands.first;
00542     ENa_strand genomic_strand = strands.second;
00543 
00544     TSignedSeqRange cds;
00545     if (is_protein) {
00546         cds = TSignedSeqRange(0, spliced_seg.GetProduct_length()*3 - 1);
00547     } else {
00548         if (!spliced_seg.CanGetProduct_id())
00549             return;
00550         cds = GetCds(spliced_seg.GetProduct_id());
00551         if (cds.Empty())
00552             return;
00553         if (product_strand == eNa_strand_minus) {
00554             NCBI_THROW(CException, eUnknown,
00555                        "TrimHolesToCodons(): "
00556                        "Reversed mRNA with CDS");
00557         }
00558     }
00559 
00560     vector<SExon> exons;
00561     GetExonStructure(spliced_seg, exons, m_scope);
00562 
00563     int frame_offset = (exons.back().prod_to/3+1)*3+cds.GetFrom(); // to make modulo operands always positive
00564 
00565     vector<SExon>::iterator right_exon_it = exons.begin();
00566     CSpliced_seg::TExons::iterator right_spl_exon_it = spliced_seg.SetExons().begin();
00567 
00568     for(;;++right_exon_it, ++right_spl_exon_it) {
00569 
00570         vector<SExon>::reverse_iterator left_exon_it(right_exon_it); 
00571         CSpliced_seg::TExons::reverse_iterator left_spl_exon_it(right_spl_exon_it);
00572 
00573         if (right_exon_it != exons.begin() && right_exon_it != exons.end()) {
00574             bool donor_set = left_spl_exon_it != spliced_seg.SetExons().rend() && (*left_spl_exon_it)->IsSetDonor_after_exon();
00575             bool acceptor_set = right_spl_exon_it != spliced_seg.SetExons().end() && (*right_spl_exon_it)->IsSetAcceptor_before_exon();
00576 
00577             if(((donor_set && acceptor_set) || left_exon_it->genomic_to + 1 == right_exon_it->genomic_from) && left_exon_it->prod_to + 1 == right_exon_it->prod_from) {
00578                 continue;
00579             }
00580         }
00581 
00582         if (right_exon_it != exons.begin() && (right_exon_it != exons.end() || (m_flags & fTrimEnds)) &&
00583             cds.GetFrom() < left_exon_it->prod_to && left_exon_it->prod_to < cds.GetTo()
00584             )
00585             TrimLeftExon((left_exon_it->prod_to - cds.GetFrom() + 1) % 3, eTrimProduct,
00586                          exons.rend(), left_exon_it, left_spl_exon_it,
00587                          product_strand, genomic_strand);
00588 
00589         if (right_exon_it != exons.end() && (right_exon_it != exons.begin() || (m_flags & fTrimEnds)) &&
00590             cds.GetFrom() < right_exon_it->prod_from && right_exon_it->prod_from < cds.GetTo()
00591             )
00592             TrimRightExon((frame_offset-right_exon_it->prod_from) % 3, eTrimProduct,
00593                           right_exon_it, exons.end(), right_spl_exon_it,
00594                           product_strand, genomic_strand);
00595         
00596         if (left_exon_it.base() != right_exon_it) {
00597             right_exon_it = exons.erase(left_exon_it.base(), right_exon_it);
00598             right_spl_exon_it = spliced_seg.SetExons().erase(left_spl_exon_it.base(), right_spl_exon_it);
00599         }
00600 
00601         if (right_exon_it == exons.end())
00602             break;
00603     }
00604     _ASSERT(right_exon_it == exons.end() && right_spl_exon_it == spliced_seg.SetExons().end());
00605 }
00606 
00607 void CFeatureGenerator::SImplementation::MaximizeTranslation(CSeq_align& align)
00608 {
00609     CSpliced_seg& spliced_seg = align.SetSegs().SetSpliced();
00610     bool is_protein_align =
00611         spliced_seg.GetProduct_type() == CSpliced_seg::eProduct_type_protein;
00612 
00613     int aa_offset = 0;
00614 
00615     NON_CONST_ITERATE (CSpliced_seg::TExons, exon_it, spliced_seg.SetExons()) {
00616         CSpliced_exon& exon = **exon_it;
00617         if (aa_offset) {
00618             if (is_protein_align)
00619                 exon.SetProduct_start().SetProtpos().SetAmin() += aa_offset;
00620             else
00621                 exon.SetProduct_start().SetNucpos() += aa_offset*3;
00622         }
00623         if (exon.IsSetParts()) {
00624             ERASE_ITERATE (CSpliced_exon::TParts, part_it, exon.SetParts()) {
00625                 CSpliced_exon_chunk& chunk = **part_it;
00626                 switch ((*part_it)->Which()) {
00627                 case CSpliced_exon_chunk::e_Genomic_ins: {
00628                     int len = chunk.GetGenomic_ins();
00629                     if (len % 3 == 0) {
00630                         chunk.SetDiag(len);
00631                     } else if (len > 3) {
00632                         CRef<CSpliced_exon_chunk> new_chunk(new CSpliced_exon_chunk);
00633                         new_chunk->SetGenomic_ins(len % 3);
00634                         exon.SetParts().insert(part_it, new_chunk);
00635                         chunk.SetDiag((len/3)*3);
00636                     }
00637                     aa_offset += len/3;
00638                 }
00639                     break;
00640                 case CSpliced_exon_chunk::e_Product_ins: {
00641                     int len = chunk.GetProduct_ins();
00642                     if (len % 3 == 0) {
00643                         exon.SetParts().erase(part_it);
00644                     } else {
00645                         chunk.SetProduct_ins(len % 3);
00646                     }
00647                     aa_offset -= len/3;
00648                 }
00649                     break;
00650                 default:
00651                     break;
00652                 }
00653             }
00654         }
00655         if (aa_offset) {
00656             if (is_protein_align)
00657                 exon.SetProduct_end().SetProtpos().SetAmin() += aa_offset;
00658             else
00659                 exon.SetProduct_end().SetNucpos() += aa_offset*3;
00660         }
00661     }
00662     if (aa_offset) {
00663         spliced_seg.SetProduct_length() += is_protein_align ? aa_offset : aa_offset*3;
00664     }
00665 }
00666 
00667 CConstRef<CSeq_align> CFeatureGenerator::AdjustAlignment(const CSeq_align& align_in, TSeqRange range, EProductPositionsMode mode)
00668 {
00669     return m_impl->AdjustAlignment(align_in, range, mode);
00670 }
00671 
00672 CConstRef<CSeq_align> CFeatureGenerator::SImplementation::AdjustAlignment(const CSeq_align& align_in, TSeqRange range, EProductPositionsMode mode)
00673 {
00674     if (!align_in.CanGetSegs() || !align_in.GetSegs().IsSpliced())
00675         return CConstRef<CSeq_align>(&align_in);
00676 
00677     CRef<CSeq_align> align(new CSeq_align);
00678     align->Assign(align_in);
00679 
00680     vector<SExon> orig_exons = GetExons(*align);
00681 
00682     CSpliced_seg& spliced_seg = align->SetSegs().SetSpliced();
00683 
00684     pair <ENa_strand, ENa_strand> strands = GetSplicedStrands(spliced_seg);
00685     ENa_strand product_strand = strands.first;
00686     ENa_strand genomic_strand = strands.second;
00687 
00688     if (product_strand == eNa_strand_minus) {
00689         NCBI_THROW(CException, eUnknown,
00690                    "AdjustAlignment(): "
00691                    "product minus strand not supported");
00692         
00693     }
00694 
00695     bool plus_strand = !(genomic_strand == eNa_strand_minus);
00696 
00697     TSeqRange align_range;
00698     if (plus_strand) {
00699         align_range = TSeqRange(spliced_seg.GetExons().front()->GetGenomic_start(),
00700                                 spliced_seg.GetExons().back()->GetGenomic_end());
00701     } else {
00702         align_range = TSeqRange(spliced_seg.GetExons().back()->GetGenomic_start(),
00703                                 spliced_seg.GetExons().front()->GetGenomic_end());
00704     }
00705     bool cross_the_origin = range.GetFrom() > range.GetTo() || align_range.GetFrom() > align_range.GetTo();
00706     TSeqPos genomic_size = 0;
00707     if (cross_the_origin) {
00708         genomic_size = m_scope->GetSequenceLength(spliced_seg.GetGenomic_id());
00709 
00710 
00711         if (range.GetFrom() > range.GetTo()) {
00712             range.SetTo(range.GetTo() + genomic_size);
00713         }
00714         if (align_range.GetFrom() > align_range.GetTo()) {
00715             align_range.SetTo(align_range.GetTo() + genomic_size);
00716         }
00717 
00718         if (range.GetTo() < align_range.GetFrom()) {
00719             range.SetFrom(range.GetFrom() + genomic_size);
00720             range.SetTo(range.GetTo() + genomic_size);
00721         }
00722         if (align_range.GetTo() < range.GetFrom()) {
00723             align_range.SetFrom(align_range.GetFrom() + genomic_size);
00724             align_range.SetTo(align_range.GetTo() + genomic_size);
00725         }
00726 
00727         TSeqPos outside_point = (min(range.GetFrom(), align_range.GetFrom())+max(range.GetTo(), align_range.GetTo())-genomic_size)/2;
00728         NON_CONST_ITERATE(CSpliced_seg::TExons, exon_it, spliced_seg.SetExons()) {
00729             CSpliced_exon& exon = **exon_it;
00730             if (exon.GetGenomic_start() < outside_point)
00731                 exon.SetGenomic_start() += genomic_size;
00732             if (exon.GetGenomic_end() < outside_point)
00733                 exon.SetGenomic_end() += genomic_size;
00734         }
00735     }
00736 
00737     _ASSERT(range.GetFrom() <= range.GetTo());
00738     _ASSERT(align_range.GetFrom() <= align_range.GetTo());
00739     _ASSERT(!(range.GetTo() < align_range.GetFrom()));
00740     _ASSERT(!(align_range.GetTo() < range.GetFrom()));
00741 
00742     vector<SExon> exons;
00743     GetExonStructure(spliced_seg, exons, m_scope);
00744 
00745     bool is_protein_align =
00746         spliced_seg.GetProduct_type() == CSpliced_seg::eProduct_type_protein;
00747 
00748     vector<SExon>::iterator right_exon_it = exons.begin();
00749     CSpliced_seg::TExons::iterator right_spl_exon_it = spliced_seg.SetExons().begin();
00750 
00751     int range_left = plus_strand ? int(range.GetFrom()) : -int(range.GetTo());
00752     int range_right = plus_strand ? int(range.GetTo()) : -int(range.GetFrom());
00753 
00754     for(;;++right_exon_it, ++right_spl_exon_it) {
00755 
00756         vector<SExon>::reverse_iterator left_exon_it(right_exon_it); 
00757         CSpliced_seg::TExons::reverse_iterator left_spl_exon_it(right_spl_exon_it);
00758 
00759         if (right_exon_it == exons.end() &&
00760             left_exon_it->genomic_to > range_right
00761             )
00762             CFeatureGenerator::SImplementation::TrimLeftExon(left_exon_it->genomic_to - range_right, eTrimGenomic,
00763                          exons.rend(), left_exon_it, left_spl_exon_it,
00764                          product_strand, genomic_strand);
00765 
00766         if (right_exon_it == exons.begin() &&
00767             right_exon_it->genomic_from < range_left
00768             )
00769             CFeatureGenerator::SImplementation::TrimRightExon(range_left - right_exon_it->genomic_from, eTrimGenomic,
00770                           right_exon_it, exons.end(), right_spl_exon_it,
00771                           product_strand, genomic_strand);
00772         
00773         if (left_exon_it.base() != right_exon_it) {
00774             right_exon_it = exons.erase(left_exon_it.base(), right_exon_it);
00775             right_spl_exon_it = spliced_seg.SetExons().erase(left_spl_exon_it.base(), right_spl_exon_it);
00776         }
00777 
00778         if (right_exon_it == exons.end())
00779             break;
00780     }
00781 
00782     CSpliced_exon& first_exon = *spliced_seg.SetExons().front();
00783     CSpliced_exon& last_exon = *spliced_seg.SetExons().back();
00784 
00785     int first_exon_extension = 0;
00786     int last_exon_extension = 0;
00787 
00788     if (plus_strand) {
00789 
00790         first_exon_extension =
00791             first_exon.GetGenomic_start()
00792             - ((range.GetFrom() < genomic_size && genomic_size <= first_exon.GetGenomic_start())
00793                ? genomic_size
00794                : range.GetFrom());
00795 
00796         if (first_exon_extension > 0) {
00797             CRef<CSpliced_exon_chunk> chunk(new CSpliced_exon_chunk);
00798             chunk->SetDiag(first_exon.IsSetParts() ? first_exon_extension : (first_exon.GetGenomic_end() - range.GetFrom() + 1));
00799             first_exon.SetParts().insert(first_exon.SetParts().begin(), chunk);
00800             first_exon.SetGenomic_start() -= first_exon_extension;
00801         }
00802 
00803         last_exon_extension =
00804             ((last_exon.GetGenomic_end() <= genomic_size-1 && genomic_size-1 < range.GetTo())
00805              ? genomic_size-1
00806              : range.GetTo())
00807             - last_exon.GetGenomic_end();
00808 
00809         if (last_exon_extension > 0) {
00810             CRef<CSpliced_exon_chunk> chunk(new CSpliced_exon_chunk);
00811             chunk->SetDiag(last_exon.IsSetParts() ? last_exon_extension : (range.GetTo() - last_exon.GetGenomic_start() + 1));
00812             last_exon.SetParts().push_back(chunk);
00813             last_exon.SetGenomic_end() += last_exon_extension;
00814         }
00815     } else {
00816         last_exon_extension =
00817             last_exon.GetGenomic_start()
00818             - ((range.GetFrom() < genomic_size && genomic_size <= last_exon.GetGenomic_start())
00819                ? genomic_size
00820                : range.GetFrom());
00821 
00822         if (last_exon_extension > 0) {
00823             CRef<CSpliced_exon_chunk> chunk(new CSpliced_exon_chunk);
00824             chunk->SetDiag(last_exon.IsSetParts() ? last_exon_extension : (last_exon.GetGenomic_end() - range.GetFrom() + 1));
00825             last_exon.SetParts().push_back(chunk);
00826             last_exon.SetGenomic_start() -= last_exon_extension;
00827         }
00828 
00829         first_exon_extension =
00830             ((first_exon.GetGenomic_end() <= genomic_size-1 && genomic_size-1 < range.GetTo())
00831              ? genomic_size-1
00832              : range.GetTo())
00833             - first_exon.GetGenomic_end();
00834         if (first_exon_extension > 0) {
00835             CRef<CSpliced_exon_chunk> chunk(new CSpliced_exon_chunk);
00836             chunk->SetDiag(first_exon.IsSetParts() ? first_exon_extension : (range.GetTo() - first_exon.GetGenomic_start() + 1));
00837             first_exon.SetParts().insert(first_exon.SetParts().begin(), chunk);
00838             first_exon.SetGenomic_end() += first_exon_extension;
00839         }
00840     }
00841 
00842     exons.front().prod_from -= first_exon_extension;
00843     exons.front().genomic_from -= first_exon_extension;
00844     exons.back().prod_to += last_exon_extension;
00845     exons.back().genomic_to += last_exon_extension;
00846 
00847 
00848     if (plus_strand) {
00849         first_exon_extension = first_exon.GetGenomic_start() - range.GetFrom();
00850 
00851         if (first_exon_extension > 0) {
00852             CRef<CSpliced_exon> exon(new CSpliced_exon);
00853             exon->SetGenomic_start() = range.GetFrom();
00854             exon->SetGenomic_end() = genomic_size-1;
00855             spliced_seg.SetExons().push_front(exon);
00856 
00857             SExon exon_struct;
00858             exon_struct.prod_from = exons.front().prod_from - first_exon_extension;
00859             exon_struct.prod_to = exons.front().prod_from - 1;
00860             exon_struct.genomic_from = exons.front().genomic_from - first_exon_extension;
00861             exon_struct.genomic_to = exons.front().genomic_from - 1;
00862 
00863             exons.insert(exons.begin(), exon_struct);
00864         }
00865 
00866         last_exon_extension = range.GetTo() - last_exon.GetGenomic_end();
00867 
00868         if (last_exon_extension > 0) {
00869             CRef<CSpliced_exon> exon(new CSpliced_exon);
00870             exon->SetGenomic_start() = 0;
00871             exon->SetGenomic_end() = last_exon_extension - 1;
00872             spliced_seg.SetExons().push_back(exon);
00873 
00874             SExon exon_struct;
00875             exon_struct.prod_from = exons.back().prod_to + 1;
00876             exon_struct.prod_to = exons.back().prod_to + last_exon_extension;
00877             exon_struct.genomic_from = exons.back().genomic_to +1;
00878             exon_struct.genomic_to = exons.back().genomic_to + last_exon_extension;
00879 
00880             exons.push_back(exon_struct);
00881         }
00882     } else {
00883         last_exon_extension = last_exon.GetGenomic_start() - range.GetFrom();
00884 
00885         if (last_exon_extension > 0) {
00886             CRef<CSpliced_exon> exon(new CSpliced_exon);
00887             exon->SetGenomic_start() = range.GetFrom();
00888             exon->SetGenomic_end() = genomic_size-1;
00889             spliced_seg.SetExons().push_back(exon);
00890 
00891             SExon exon_struct;
00892             exon_struct.prod_from = exons.back().prod_to + 1;
00893             exon_struct.prod_to = exons.back().prod_to + last_exon_extension;
00894             exon_struct.genomic_from = exons.back().genomic_to +1;
00895             exon_struct.genomic_to = exons.back().genomic_to + last_exon_extension;
00896 
00897             exons.push_back(exon_struct);
00898         }
00899 
00900         first_exon_extension = range.GetTo() - first_exon.GetGenomic_end();
00901 
00902         if (first_exon_extension > 0) {
00903             CRef<CSpliced_exon> exon(new CSpliced_exon);
00904             exon->SetGenomic_start() = 0;
00905             exon->SetGenomic_end() = first_exon_extension - 1;
00906             spliced_seg.SetExons().push_front(exon);
00907 
00908             SExon exon_struct;
00909             exon_struct.prod_from = exons.front().prod_from - first_exon_extension;
00910             exon_struct.prod_to = exons.front().prod_from - 1;
00911             exon_struct.genomic_from = exons.front().genomic_from - first_exon_extension;
00912             exon_struct.genomic_to = exons.front().genomic_from - 1;
00913 
00914             exons.insert(exons.begin(), exon_struct);
00915         }
00916     }
00917 
00918     if (range_left != exons.front().genomic_from || range_right != exons.back().genomic_to) {
00919         NCBI_THROW(CException, eUnknown,
00920                    "AdjustAlignment(): "
00921                    "result's ends do not match the range. This is a bug in AdjustAlignment implementation");
00922     }
00923 
00924     int offset = is_protein_align ? int(exons.front().prod_from/3)*3 : exons.front().prod_from;
00925     if (offset > exons.front().prod_from) // negative division rounds toward zero
00926         offset -= 3;
00927 
00928     if (mode == eTryToPreserveProductPositions && offset > 0) {
00929         offset = 0; // do not shift product position unnecessarily
00930     }
00931 
00932     vector<SExon>::iterator exon_struct_it = exons.begin();
00933 
00934     int putative_prod_length = 0;
00935     if (is_protein_align) {
00936         NON_CONST_ITERATE (CSpliced_seg::TExons, exon_it, spliced_seg.SetExons()) {
00937             CSpliced_exon& exon = **exon_it;
00938             SetProtpos(exon.SetProduct_start(), exon_struct_it->prod_from - offset);
00939             SetProtpos(exon.SetProduct_end(), exon_struct_it->prod_to - offset);
00940             ++exon_struct_it;
00941         }
00942         putative_prod_length = (exons.back().prod_to - offset + 3)/3;
00943     } else {
00944         NON_CONST_ITERATE (CSpliced_seg::TExons, exon_it, spliced_seg.SetExons()) {
00945             CSpliced_exon& exon = **exon_it;
00946             exon.SetProduct_start().SetNucpos() = exon_struct_it->prod_from - offset;
00947             exon.SetProduct_end().SetNucpos() = exon_struct_it->prod_to - offset;
00948             ++exon_struct_it;
00949         }
00950         putative_prod_length = exons.back().prod_to - offset + 1;
00951     }
00952     if (mode == eForceProductFrom0 || (int)spliced_seg.GetProduct_length() < putative_prod_length) {
00953         spliced_seg.SetProduct_length(putative_prod_length);
00954     }
00955 
00956     if (cross_the_origin) {
00957         NON_CONST_ITERATE(CSpliced_seg::TExons, exon_it, spliced_seg.SetExons()) {
00958             CSpliced_exon& exon = **exon_it;
00959             if (exon.GetGenomic_start() >= genomic_size)
00960                 exon.SetGenomic_start() -= genomic_size;
00961             if (exon.GetGenomic_end() >= genomic_size)
00962                 exon.SetGenomic_end() -= genomic_size;
00963         }
00964     }
00965 
00966     if (GetExons(*align) != orig_exons) {
00967         ClearScores(*align);
00968     }
00969 
00970     return align;
00971 }
00972 
00973 CMappedFeat GetCdsOnMrna(const objects::CSeq_id& rna_id, CScope& scope)
00974 {
00975     CMappedFeat cdregion_feat;
00976     CBioseq_Handle handle = scope.GetBioseqHandle(rna_id);
00977     if (handle) {
00978         CFeat_CI feat_iter(handle, CSeqFeatData::eSubtype_cdregion);
00979         if (feat_iter  &&  feat_iter.GetSize()) {
00980             cdregion_feat = *feat_iter;
00981             const CSeq_loc& cds_loc = cdregion_feat.GetLocation();
00982             const CSeq_id* cds_loc_seq_id  = cds_loc.GetId();
00983             if (cds_loc_seq_id == NULL || !sequence::IsSameBioseq(*cds_loc_seq_id, rna_id, &scope)) {
00984                 cdregion_feat = CMappedFeat();
00985             }
00986         }
00987     }
00988     return cdregion_feat;
00989 }
00990 
00991 TSignedSeqRange CFeatureGenerator::SImplementation::GetCds(const objects::CSeq_id& rna_id)
00992 {
00993     CMappedFeat cdregion = GetCdsOnMrna(rna_id, *m_scope);
00994     if (!cdregion) {
00995         return TSignedSeqRange();
00996     }
00997 
00998     TSeqRange cds = cdregion.GetLocation().GetTotalRange();
00999 
01000     return TSignedSeqRange(cds.GetFrom(), cds.GetTo());
01001 }
01002 
01003 void CFeatureGenerator::SImplementation::TrimLeftExon(int trim_amount, ETrimSide side,
01004                                                       vector<SExon>::reverse_iterator left_edge,
01005                                                       vector<SExon>::reverse_iterator& exon_it,
01006                                                       CSpliced_seg::TExons::reverse_iterator& spl_exon_it,
01007                                                       ENa_strand product_strand,
01008                                                       ENa_strand genomic_strand)
01009 {
01010     bool is_protein = (*spl_exon_it)->GetProduct_start().IsProtpos();
01011 
01012     while (trim_amount > 0) {
01013         int exon_len = side==eTrimProduct
01014             ? (exon_it->prod_to - exon_it->prod_from + 1)
01015             : (exon_it->genomic_to - exon_it->genomic_from + 1);
01016         if (exon_len <= trim_amount) {
01017             ++exon_it;
01018             ++spl_exon_it;
01019             trim_amount -= exon_len;
01020             if (exon_it == left_edge)
01021                 break;
01022         } else {
01023             (*spl_exon_it)->SetPartial(true);
01024             (*spl_exon_it)->ResetDonor_after_exon();
01025 
01026             int genomic_trim_amount = 0;
01027             int product_trim_amount = 0;
01028 
01029             if ((*spl_exon_it)->CanGetParts() && !(*spl_exon_it)->GetParts().empty()) {
01030                 CSpliced_exon::TParts& parts = (*spl_exon_it)->SetParts();
01031                 CSpliced_exon_Base::TParts::iterator chunk = parts.end();
01032                 while (--chunk, (trim_amount>0 ||
01033                                  (side==eTrimProduct
01034                                   ? (*chunk)->IsGenomic_ins()
01035                                   : (*chunk)->IsProduct_ins()))) {
01036                     int product_chunk_len = 0;
01037                     int genomic_chunk_len = 0;
01038                     switch((*chunk)->Which()) {
01039                     case CSpliced_exon_chunk::e_Match:
01040                         product_chunk_len = (*chunk)->GetMatch();
01041                         genomic_chunk_len = product_chunk_len;
01042                         if (product_chunk_len > trim_amount) {
01043                             (*chunk)->SetMatch(product_chunk_len - trim_amount);
01044                         }
01045                         break;
01046                     case CSpliced_exon_chunk::e_Mismatch:
01047                         product_chunk_len = (*chunk)->GetMismatch();
01048                         genomic_chunk_len = product_chunk_len;
01049                         if (product_chunk_len > trim_amount) {
01050                             (*chunk)->SetMismatch(product_chunk_len - trim_amount);
01051                         }
01052                         break;
01053                     case CSpliced_exon_chunk::e_Diag:
01054                         product_chunk_len = (*chunk)->GetDiag();
01055                         genomic_chunk_len = product_chunk_len;
01056                         if (product_chunk_len > trim_amount) {
01057                             (*chunk)->SetDiag(product_chunk_len - trim_amount);
01058                         }
01059                         break;
01060                         
01061                     case CSpliced_exon_chunk::e_Product_ins:
01062                         product_chunk_len = (*chunk)->GetProduct_ins();
01063                         if (side==eTrimProduct && product_chunk_len > trim_amount) {
01064                             (*chunk)->SetProduct_ins(product_chunk_len - trim_amount);
01065                         }
01066                         break;
01067                     case CSpliced_exon_chunk::e_Genomic_ins:
01068                         genomic_chunk_len = (*chunk)->GetGenomic_ins();
01069                         if (side==eTrimGenomic && genomic_chunk_len > trim_amount) {
01070                             (*chunk)->SetGenomic_ins(genomic_chunk_len - trim_amount);
01071                         }
01072                         break;
01073                     default:
01074                         _ASSERT(false);
01075                         break;
01076                     }
01077                     
01078                     if (side==eTrimProduct && product_chunk_len <= trim_amount) {
01079                         genomic_trim_amount += genomic_chunk_len;
01080                         product_trim_amount += product_chunk_len;
01081                         trim_amount -= product_chunk_len;
01082                     } else if (side==eTrimGenomic && genomic_chunk_len <= trim_amount) {
01083                         genomic_trim_amount += genomic_chunk_len;
01084                         product_trim_amount += product_chunk_len;
01085                         trim_amount -= genomic_chunk_len;
01086                     } else {
01087                         genomic_trim_amount += min(trim_amount, genomic_chunk_len);
01088                         product_trim_amount += min(trim_amount, product_chunk_len);
01089                         trim_amount = 0;
01090                         break;
01091                     }
01092                     chunk = parts.erase(chunk);
01093                 }
01094                 
01095             } else {
01096                 genomic_trim_amount += trim_amount;
01097                 product_trim_amount += trim_amount;
01098                 trim_amount = 0;
01099             }
01100             
01101             exon_it->prod_to -= product_trim_amount;
01102             exon_it->genomic_to -= genomic_trim_amount;
01103 
01104             if (is_protein) {
01105                 CProduct_pos& prot_pos = (*spl_exon_it)->SetProduct_end();
01106                 SetProtpos(prot_pos, exon_it->prod_to);
01107             } else {
01108                 if (product_strand != eNa_strand_minus) {
01109                     (*spl_exon_it)->SetProduct_end().SetNucpos() -= product_trim_amount;
01110                 } else {
01111                     (*spl_exon_it)->SetProduct_start().SetNucpos() += product_trim_amount;
01112                 }
01113             }
01114 
01115             if (genomic_strand != eNa_strand_minus) {
01116                 (*spl_exon_it)->SetGenomic_end() -= genomic_trim_amount;
01117             } else {
01118                 (*spl_exon_it)->SetGenomic_start() += genomic_trim_amount;
01119             }
01120         }
01121     }
01122 }
01123 void CFeatureGenerator::SImplementation::TrimRightExon(int trim_amount, ETrimSide side,
01124                                                        vector<SExon>::iterator& exon_it,
01125                                                        vector<SExon>::iterator right_edge,
01126                                                        CSpliced_seg::TExons::iterator& spl_exon_it,
01127                                                        ENa_strand product_strand,
01128                                                        ENa_strand genomic_strand)
01129 {
01130     bool is_protein = (*spl_exon_it)->GetProduct_start().IsProtpos();
01131 
01132     while (trim_amount > 0) {
01133         int exon_len = side==eTrimProduct
01134             ? (exon_it->prod_to - exon_it->prod_from + 1)
01135             : (exon_it->genomic_to - exon_it->genomic_from + 1);
01136         if (exon_len <= trim_amount) {
01137             ++exon_it;
01138             ++spl_exon_it;
01139             trim_amount -= exon_len;
01140             if (exon_it == right_edge)
01141                 break;
01142         } else {
01143             (*spl_exon_it)->SetPartial(true);
01144             (*spl_exon_it)->ResetAcceptor_before_exon();
01145 
01146             int genomic_trim_amount = 0;
01147             int product_trim_amount = 0;
01148 
01149             if ((*spl_exon_it)->CanGetParts() && !(*spl_exon_it)->GetParts().empty()) {
01150                 CSpliced_exon::TParts& parts = (*spl_exon_it)->SetParts();
01151                 CSpliced_exon_Base::TParts::iterator chunk = parts.begin();
01152                 for (; trim_amount>0 ||
01153                          (side==eTrimProduct
01154                           ? (*chunk)->IsGenomic_ins()
01155                           : (*chunk)->IsProduct_ins());
01156                      ) {
01157                     int product_chunk_len = 0;
01158                     int genomic_chunk_len = 0;
01159                     switch((*chunk)->Which()) {
01160                     case CSpliced_exon_chunk::e_Match:
01161                         product_chunk_len = (*chunk)->GetMatch();
01162                         genomic_chunk_len = product_chunk_len;
01163                         if (product_chunk_len > trim_amount) {
01164                             (*chunk)->SetMatch(product_chunk_len - trim_amount);
01165                         }
01166                         break;
01167                     case CSpliced_exon_chunk::e_Mismatch:
01168                         product_chunk_len = (*chunk)->GetMismatch();
01169                         genomic_chunk_len = product_chunk_len;
01170                         if (product_chunk_len > trim_amount) {
01171                             (*chunk)->SetMismatch(product_chunk_len - trim_amount);
01172                         }
01173                         break;
01174                     case CSpliced_exon_chunk::e_Diag:
01175                         product_chunk_len = (*chunk)->GetDiag();
01176                         genomic_chunk_len = product_chunk_len;
01177                         if (product_chunk_len > trim_amount) {
01178                             (*chunk)->SetDiag(product_chunk_len - trim_amount);
01179                         }
01180                         break;
01181                         
01182                     case CSpliced_exon_chunk::e_Product_ins:
01183                         product_chunk_len = (*chunk)->GetProduct_ins();
01184                         if (side==eTrimProduct && product_chunk_len > trim_amount) {
01185                             (*chunk)->SetProduct_ins(product_chunk_len - trim_amount);
01186                         }
01187                         break;
01188                     case CSpliced_exon_chunk::e_Genomic_ins:
01189                         genomic_chunk_len = (*chunk)->GetGenomic_ins();
01190                         if (side==eTrimGenomic && genomic_chunk_len > trim_amount) {
01191                             (*chunk)->SetGenomic_ins(genomic_chunk_len - trim_amount);
01192                         }
01193                         break;
01194                     default:
01195                         _ASSERT(false);
01196                         break;
01197                     }
01198                     
01199                     if (side==eTrimProduct && product_chunk_len <= trim_amount) {
01200                         genomic_trim_amount += genomic_chunk_len;
01201                         product_trim_amount += product_chunk_len;
01202                         trim_amount -= product_chunk_len;
01203                     } else if (side==eTrimGenomic && genomic_chunk_len <= trim_amount) {
01204                         genomic_trim_amount += genomic_chunk_len;
01205                         product_trim_amount += product_chunk_len;
01206                         trim_amount -= genomic_chunk_len;
01207                     } else {
01208                         genomic_trim_amount += min(trim_amount, genomic_chunk_len);
01209                         product_trim_amount += min(trim_amount, product_chunk_len);
01210                         trim_amount = 0;
01211                         break;
01212                     }
01213                     chunk = parts.erase(chunk);
01214                 }
01215                 
01216             } else {
01217                 genomic_trim_amount += trim_amount;
01218                 product_trim_amount += trim_amount;
01219                 trim_amount = 0;
01220             }
01221             
01222             exon_it->prod_from += product_trim_amount;
01223             exon_it->genomic_from += genomic_trim_amount;
01224 
01225             if (is_protein) {
01226                 CProduct_pos& prot_pos = (*spl_exon_it)->SetProduct_start();
01227                 SetProtpos(prot_pos, exon_it->prod_from);
01228             } else {
01229                 if (product_strand != eNa_strand_minus) {
01230                     (*spl_exon_it)->SetProduct_start().SetNucpos() += product_trim_amount;
01231                 } else {
01232                     (*spl_exon_it)->SetProduct_end().SetNucpos() -= product_trim_amount;
01233                 }
01234             }
01235 
01236             if (genomic_strand != eNa_strand_minus) {
01237                 (*spl_exon_it)->SetGenomic_start() += genomic_trim_amount;
01238             } else {
01239                 (*spl_exon_it)->SetGenomic_end() -= genomic_trim_amount;
01240             }
01241         }
01242     }
01243 }
01244 END_NCBI_SCOPE
Modified on Tue Jul 22 17:50:49 2014 by modify_doxy.py rev. 426318