00001 #ifndef ALGO_ALIGN_UTIL_HITFILTER__HPP
00002 #define ALGO_ALIGN_UTIL_HITFILTER__HPP
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035 #include <algo/align/util/hit_comparator.hpp>
00036
00037 #include <algorithm>
00038 #include <numeric>
00039 #include <vector>
00040 #include <set>
00041
00042
00043
00044
00045
00046
00047
00048
00049 BEGIN_NCBI_SCOPE
00050
00051
00052
00053
00054 template<class THit>
00055 class CHitCoverageAccumulator
00056 {
00057 public:
00058
00059 typedef CRef<THit> THitRef;
00060 typedef typename THit::TCoord TCoord;
00061
00062
00063
00064
00065
00066 CHitCoverageAccumulator(Uint1 where):
00067 m_Where(where),
00068 m_Finish(0)
00069 {
00070 if(m_Where == 0) {
00071 m_i1 = 0;
00072 m_i2 = 1;
00073 }
00074 else {
00075 m_i1 = 2;
00076 m_i2 = 3;
00077 }
00078 }
00079
00080
00081
00082
00083
00084
00085
00086 TCoord operator() (TCoord iVal, const THitRef& ph)
00087 {
00088 const TCoord box [] = { ph->GetQueryMin(), ph->GetQueryMax(),
00089 ph->GetSubjMin(), ph->GetSubjMax() };
00090
00091 if(box[m_i1] > m_Finish || m_Finish == 0) {
00092 return iVal + (m_Finish = box[m_i2]) - box[m_i1] + 1;
00093 }
00094 else {
00095 TCoord Finish0 = m_Finish;
00096 return (box[m_i2] > Finish0)?
00097 (iVal + (m_Finish = box[m_i2]) - Finish0): iVal;
00098 }
00099 }
00100
00101 private:
00102
00103 Uint1 m_Where;
00104 TCoord m_Finish;
00105 Uint1 m_i1, m_i2;
00106 };
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119 template<class THit>
00120 class CHitFilter: public CObject
00121 {
00122 public:
00123
00124 typedef CRef<THit> THitRef;
00125 typedef vector<THitRef> THitRefs;
00126 typedef typename THit::TCoord TCoord;
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139 static TCoord s_GetCoverage(Uint1 where,
00140 typename THitRefs::const_iterator from,
00141 typename THitRefs::const_iterator to)
00142 {
00143
00144 THitRefs hitrefs (to - from);
00145 typedef typename THitRefs::const_iterator TCI;
00146 typedef typename THitRefs::iterator TI;
00147 TCI ii = from;
00148 TI jj = hitrefs.begin();
00149 while(ii != to) {
00150 *jj++ = *ii++;
00151 }
00152
00153
00154 typedef CHitComparator<THit> THitComparator;
00155 typename THitComparator::ESortCriterion sort_type (
00156 where == 0?
00157 THitComparator::eQueryMin:
00158 THitComparator::eSubjMin);
00159
00160 THitComparator sorter (sort_type);
00161 stable_sort(hitrefs.begin(), hitrefs.end(), sorter);
00162
00163
00164 return accumulate(hitrefs.begin(), hitrefs.end(),
00165 TCoord(0),
00166 CHitCoverageAccumulator<THit>(where));
00167 }
00168
00169
00170
00171
00172
00173
00174
00175 static void s_GetSpan(const THitRefs& hitrefs, TCoord span [4])
00176 {
00177 span[0] = span[2] = kMax_UInt;
00178 span[1] = span[3] = 0;
00179
00180 for(typename THitRefs::const_iterator ii = hitrefs.begin(),
00181 ii_end = hitrefs.end(); ii != ii_end; ++ii) {
00182
00183 TCoord x = (*ii)->GetQueryMin();
00184 if(span[0] > x) {
00185 span[0] = x;
00186 }
00187 x = (*ii)->GetSubjMin();
00188 if(span[2] > x) {
00189 span[2] = x;
00190 }
00191 x = (*ii)->GetQueryMax();
00192 if(span[1] < x) {
00193 span[1] = x;
00194 }
00195 x = (*ii)->GetSubjMax();
00196 if(span[3] < x) {
00197 span[3] = x;
00198 }
00199 }
00200 }
00201
00202 static TCoord s_GetOverlap(TCoord L1, TCoord R1, TCoord L2, TCoord R2)
00203 {
00204 const TCoord minmax = min(R1, R2);
00205 const TCoord maxmin = max(L1, L2);
00206 const int n = minmax - maxmin + 1;
00207 return n > 0? n: 0;
00208 }
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226
00227
00228 enum EUnique_type {
00229 e_Strict,
00230 e_Query,
00231 e_Subject
00232 };
00233
00234 static void s_RunGreedy(typename THitRefs::iterator hri_beg,
00235 typename THitRefs::iterator hri_end,
00236 THitRefs* phits_new,
00237 TCoord min_hit_len = 100,
00238 double min_hit_idty = .9,
00239 TCoord margin = 1,
00240 TCoord retain_overlap = 0,
00241 EUnique_type unique_type = e_Strict)
00242 {
00243
00244 int max_idx = 2;
00245 int min_idx = 0;
00246 switch (unique_type) {
00247 case e_Strict:
00248 break;
00249 case e_Query:
00250 max_idx = 1;
00251 break;
00252 case e_Subject:
00253 min_idx = 1;
00254 break;
00255 }
00256
00257 if(hri_beg > hri_end) {
00258 NCBI_THROW(CAlgoAlignUtilException, eInternal,
00259 "Invalid input iterator order");
00260 }
00261
00262 const size_t dim0 = hri_end - hri_beg;
00263 THitRefs all;
00264
00265 size_t dim_factor = 2;
00266 bool restart = true;
00267
00268 vector<bool> del;
00269
00270 while(restart) {
00271
00272 restart = false;
00273
00274 all.resize(dim0);
00275 copy(hri_beg, hri_end, all.begin());
00276 all.reserve(dim_factor * dim0);
00277
00278 const typename THitRefs::iterator all_beg = all.begin();
00279
00280
00281 typename THitRefs::iterator ii = all_beg;
00282 THitEnds hit_ends;
00283 for(size_t i = 0; i < dim0; ++i, ++ii) {
00284
00285 THitRef& h = *ii;
00286 for(Uint1 point = 0; point < 4; ++point) {
00287 THitEnd he;
00288 he.m_Point = point;
00289 he.m_Ptr = &h;
00290 he.m_X = h->GetBox()[point];
00291 hit_ends.insert(he);
00292 }
00293 }
00294
00295
00296
00297 THitEnds hit_mids;
00298 typename THit::TId id;
00299 id = (*(hit_ends.begin()->m_Ptr))->GetId(hit_ends.begin()->m_Point/2);
00300 typedef pair<THitRef*, Uint1> THitEndInfo;
00301 typedef set<THitEndInfo> TOpenHits;
00302 TOpenHits open_hits;
00303 open_hits.insert(
00304 THitEndInfo(hit_ends.begin()->m_Ptr,hit_ends.begin()->m_Point/2));
00305 ITERATE(typename THitEnds, ii, hit_ends) {
00306
00307 const Uint1 where = ii->m_Point/2;
00308 THitRef* hitrefptr = ii->m_Ptr;
00309 if((*hitrefptr)->GetId(where)->CompareOrdered(*id) != 0) {
00310
00311 open_hits.clear();
00312 open_hits.insert(THitEndInfo(hitrefptr,where));
00313 id = (*hitrefptr)->GetId(where);
00314 }
00315 else {
00316 typename TOpenHits::const_iterator iise = open_hits.end();
00317 typename TOpenHits::iterator iis
00318 = open_hits.find(THitEndInfo(hitrefptr,where));
00319 if(iis == iise) {
00320
00321 open_hits.insert(THitEndInfo(hitrefptr,where));
00322 }
00323 else {
00324
00325 open_hits.erase(iis);
00326 const float curscore = (*hitrefptr)->GetScore();
00327 ITERATE(typename TOpenHits, iis2, open_hits) {
00328
00329 bool hd_score (((*(iis2->first))->GetScore() < curscore));
00330
00331
00332
00333
00334
00335
00336 bool hd_xl (margin + (*(iis2->first))->GetMin(iis2->second)
00337 < (*hitrefptr)->GetMin(where));
00338 bool hd_xr (margin + (*hitrefptr)->GetMax(where)
00339 < (*(iis2->first))->GetMax(iis2->second));
00340 if(hd_score && hd_xl && hd_xr) {
00341
00342 THitEnd hm;
00343 hm.m_Point = iis2->second * 2;
00344 hm.m_Ptr = iis2->first;
00345 hm.m_X = ( (*hitrefptr)->GetStart(where)
00346 + (*hitrefptr)->GetStop(where) ) / 2;
00347 hit_mids.insert(hm);
00348 }
00349 }
00350 }
00351 }
00352 }
00353
00354 hit_ends.insert(hit_mids.begin(), hit_mids.end());
00355
00356 vector<bool> skip (dim0, false);
00357 skip.reserve(dim_factor * dim0);
00358 del.assign(dim0, false);
00359 del.reserve(dim_factor * dim0);
00360
00361 const THitRef* hitref_firstptr = &(*all_beg);
00362 size_t dim = dim0;
00363 while(restart == false) {
00364
00365
00366 double score_best = 0;
00367 typename THitRefs::iterator ii = all_beg, ii_best = all.end();
00368 for(size_t i = 0; i < dim; ++i, ++ii) {
00369 if(skip[i] == false) {
00370 const double score = (*ii)->GetScore();
00371 if(score > score_best) {
00372 ii_best = ii;
00373 score_best = score;
00374 }
00375 }
00376 }
00377
00378 if(ii_best == all.end()) {
00379 break;
00380 }
00381
00382
00383 const THitRef& hc = *ii_best;
00384 const bool is_constraint = hc->GetScore() > 1e20;
00385 THitEnd he [4];
00386 for(Uint1 point = 0; point < 4; ++point) {
00387
00388 he[point].m_Point = point;
00389 he[point].m_Ptr = const_cast<THitRef*>(&hc);
00390 if(is_constraint == false) {
00391 he[point].m_X = hc->GetBox()[point];
00392 }
00393 else {
00394 const TCoord a = 0, b = numeric_limits<TCoord>::max() / 2;
00395 const bool is_start = point % 2 == 0;
00396 const bool is_plus = hc->GetStrand(point / 2);
00397 he[point].m_X = (is_start ^ is_plus)? b: a;
00398 }
00399 }
00400
00401 for(Uint1 where = min_idx; where < max_idx; ++where) {
00402
00403 const TCoord cmin = hc->GetMin(where);
00404 const TCoord cmax = hc->GetMax(where);
00405
00406 THitEnd *phe_lo, *phe_hi;
00407 const Uint1 i1 = 2*where, i2 = i1 + 1;
00408 if(hc->GetStrand(where)) {
00409 phe_lo = &he[i1];
00410 phe_hi = &he[i2];
00411 }
00412 else {
00413 phe_lo = &he[i2];
00414 phe_hi = &he[i1];
00415 }
00416
00417 if(phe_lo->m_X >= margin) {
00418 phe_lo->m_X -= margin;
00419 }
00420 else {
00421 phe_lo->m_X = 0;
00422 }
00423
00424 phe_hi->m_X += margin;
00425
00426 typedef typename THitEnds::iterator THitEndsIter;
00427 THitEndsIter ii0 (hit_ends.lower_bound(*phe_lo));
00428 THitEndsIter ii1 (hit_ends.upper_bound(*phe_hi));
00429
00430
00431 if(phe_lo->m_X == 0) {
00432 THitRef hitref (*(ii0->m_Ptr));
00433 const typename THit::TId& id (hitref->GetId(ii0->m_Point/2));
00434 for(THitEndsIter ii_start(hit_ends.begin());
00435 ii0 != ii_start; --ii0)
00436 {
00437 THitRef hr (*(ii0->m_Ptr));
00438 const typename THit::TId & id2 (hr->GetId(ii0->m_Point/2));
00439 if(0 != id->CompareOrdered(*id2)) {
00440 ++ii0;
00441 break;
00442 }
00443 }
00444 }
00445
00446 for(typename THitEnds::iterator ii (ii0); ii != ii1; ++ii) {
00447
00448 const THitEnd& he = *ii;
00449
00450 const size_t hitrefidx = he.m_Ptr - hitref_firstptr;
00451 const bool alive = skip[hitrefidx] == false;
00452 const bool self = he.m_Ptr == &hc;
00453
00454 if(alive && !self) {
00455
00456 THitRef hit_new;
00457 int newpos
00458 = sx_Cleave(*he.m_Ptr, he.m_Point/2,
00459 cmin, cmax, min_hit_len,
00460 min_hit_idty, & hit_new,
00461 retain_overlap);
00462
00463 if(newpos <= -2) {
00464 del[hitrefidx] = skip[hitrefidx] = true;
00465 }
00466
00467 if(newpos >= 0) {
00468
00469 const TCoord* box = (*he.m_Ptr)->GetBox();
00470
00471 THitEnd he2;
00472 he2.m_Point = he.m_Point;
00473 he2.m_Ptr = he.m_Ptr;
00474 he2.m_X = box[he2.m_Point];
00475 hit_ends.insert(he2);
00476
00477 THitEnd he3;
00478 he3.m_Point = (he.m_Point + 2) % 4;
00479 he3.m_Ptr = he.m_Ptr;
00480 he3.m_X = box[he3.m_Point];
00481 hit_ends.insert(he3);
00482
00483 if(hit_new.NotEmpty()) {
00484
00485 if(++dim > 2 * dim0) {
00486 dim_factor *= 2;
00487 restart = true;
00488 break;
00489 }
00490
00491 all.push_back(hit_new);
00492 THitRef* ptr = & all.back();
00493 del.push_back(false);
00494 skip.push_back(false);
00495
00496 Uint1 point = he.m_Point;
00497 Uint1 where = point / 2;
00498
00499
00500 THitEnd he2;
00501 he2.m_Point = point;
00502 he2.m_Ptr = ptr;
00503 he2.m_X = (box[point]== hit_new->GetStart(where))?
00504 hit_new->GetStop(where):
00505 hit_new->GetStart(where);
00506 hit_ends.insert(he2);
00507
00508 point = (he.m_Point + 2) % 4;
00509 where = point / 2;
00510
00511 THitEnd he3;
00512 he3.m_Point = point;
00513 he3.m_Ptr = ptr;
00514 he3.m_X = (box[point]==hit_new->GetStart(where))?
00515 hit_new->GetStop(where):
00516 hit_new->GetStart(where);
00517 hit_ends.insert(he3);
00518
00519
00520
00521
00522 }
00523 }
00524 }
00525 }
00526 }
00527
00528 skip[&hc - hitref_firstptr] = true;
00529 }
00530 }
00531
00532
00533 typename THitRefs::iterator all_beg = all.begin(), ii = all_beg;
00534 const size_t dim = all.size();
00535 for(size_t i = 0; i < dim; ++i, ++ii) {
00536 if(del[i]) {
00537 ii->Reset(NULL);
00538 }
00539 }
00540
00541
00542
00543 typename THitRefs::iterator idst = hri_beg, isrc = all_beg,
00544 isrc_end = all.end();
00545 while(isrc != isrc_end && idst != hri_end) {
00546 if(isrc->NotEmpty()) {
00547 *idst++ = *isrc;
00548 }
00549 ++isrc;
00550 }
00551
00552
00553 while(idst != hri_end) {
00554 idst++ -> Reset(NULL);
00555 }
00556
00557
00558 phits_new->resize(0);
00559 while(isrc != isrc_end) {
00560 if(isrc->NotEmpty()) {
00561 phits_new->push_back(*isrc);
00562 }
00563 ++isrc;
00564 }
00565 }
00566
00567
00568
00569
00570
00571 static void s_MergeAbutting(typename THitRefs::iterator hri_beg,
00572 typename THitRefs::iterator hri_end,
00573 const double& maxlenfr,
00574 THitRefs* pout)
00575 {
00576 if(hri_beg > hri_end) {
00577 NCBI_THROW(CAlgoAlignUtilException, eInternal,
00578 "Invalid input iterator order");
00579 }
00580
00581
00582 const size_t dim0 = hri_end - hri_beg;
00583 THitRefs all;
00584 all.reserve(dim0);
00585 for(typename THitRefs::iterator hri = hri_beg; hri != hri_end; ++hri) {
00586 if((*hri)->GetQueryStrand() == false) {
00587 (*hri)->FlipStrands();
00588 }
00589 all.push_back(*hri);
00590 }
00591
00592
00593
00594 typename THitRefs::iterator ii1 = all.begin();
00595 typename THitRefs::iterator ii2 = all.end(); --ii2;
00596 THitEnds hit_ends;
00597 while(ii1 != ii2) {
00598
00599 THitRef& h1 = *ii1;
00600 THitRef& h2 = *ii2;
00601 for(Uint1 point = 0; point < 4; ++point) {
00602
00603 THitEnd he1;
00604 he1.m_Point = point;
00605 he1.m_Ptr = &h1;
00606 he1.m_X = h1->GetBox()[point];
00607 hit_ends.insert(he1);
00608
00609 THitEnd he2;
00610 he2.m_Point = point;
00611 he2.m_Ptr = &h2;
00612 he2.m_X = h2->GetBox()[point];
00613 hit_ends.insert(he2);
00614 }
00615
00616 if(++ii1 == ii2) {
00617 --ii2;
00618 break;
00619 }
00620 --ii2;
00621 }
00622
00623 if(ii1 == ii2) {
00624 THitRef& h = *ii2;
00625 for(Uint1 point = 0; point < 4; ++point) {
00626 THitEnd he;
00627 he.m_Point = point;
00628 he.m_Ptr = &h;
00629 he.m_X = h->GetBox()[point];
00630 hit_ends.insert(he);
00631 }
00632 }
00633
00634
00635 typedef CHitComparator<THit> THitComparator;
00636 typename THitComparator::ESortCriterion sort_type
00637 (THitComparator::eQueryIdSubjIdSubjStrand);
00638 THitComparator sorter (sort_type);
00639 stable_sort(all.begin(), all.end(), sorter);
00640
00641
00642 pout->clear();
00643
00644 THitRef h0 (all.front());
00645 typename THitRefs::iterator ib = all.begin();
00646 NON_CONST_ITERATE(typename THitRefs, ii, all) {
00647
00648 if ( ! (*ii)->GetQueryId()->Match(*(h0->GetQueryId())) ||
00649 ! (*ii)->GetSubjId()->Match(*(h0->GetSubjId())) ||
00650 (*ii)->GetSubjStrand() != h0->GetSubjStrand() )
00651 {
00652 h0 = *ii;
00653 sx_TestAndMerge(ib, ii, hit_ends, maxlenfr, pout);
00654 ib = ii;
00655 }
00656 }
00657 sx_TestAndMerge(ib, all.end(), hit_ends, maxlenfr, pout);
00658 }
00659
00660
00661 static bool s_PNullRef(const THitRef& hr) {
00662 return hr.IsNull();
00663 }
00664
00665 protected:
00666
00667 struct SHitEnd {
00668
00669 Uint1 m_Point;
00670 THitRef* m_Ptr;
00671 TCoord m_X;
00672
00673 bool operator < (const SHitEnd& rhs) const {
00674
00675 const Uint1 where1 = m_Point < 2? 0: 1;
00676 const Uint1 where2 = rhs.m_Point < 2? 0: 1;
00677 const THit& h1 = **m_Ptr;
00678 const THit& h2 = **rhs.m_Ptr;
00679 int c = h1.GetId(where1)->CompareOrdered(*h2.GetId(where2));
00680 return c < 0? true: (c > 0? false:
00681 (m_X == rhs.m_X? (h1.GetScore() < h2.GetScore()): m_X < rhs.m_X) );
00682 }
00683
00684 friend ostream& operator << (ostream& ostr, const SHitEnd& he) {
00685 return
00686 ostr << "(Point,Coord) = ("
00687 << int(he.m_Point) << ", "
00688 << he.m_X << ")\n"
00689 << **he.m_Ptr;
00690 }
00691 };
00692
00693 typedef SHitEnd THitEnd;
00694 typedef multiset<THitEnd> THitEnds;
00695
00696
00697
00698 static int sx_Cleave(THitRef& h, Uint1 where,
00699 TCoord cmin, TCoord cmax,
00700 TCoord min_hit_len, double& min_idty,
00701 THitRef* pnew_hit,
00702 TCoord retain_overlap)
00703 {
00704 int rv = -1;
00705 pnew_hit->Reset(NULL);
00706
00707 try {
00708 TCoord lmin = h->GetMin(where), lmax = h->GetMax(where);
00709
00710 if(retain_overlap > 0) {
00711 const TCoord overlap = s_GetOverlap(lmin, lmax, cmin, cmax);
00712 if(overlap >= retain_overlap) {
00713 return -1;
00714 }
00715 }
00716
00717 if(cmin <= lmin && lmax <= cmax) {
00718 return -2;
00719 }
00720
00721 int ldif = cmin - lmin, rdif = lmax - cmax;
00722
00723 if(ldif > int(min_hit_len) && rdif > int(min_hit_len)) {
00724
00725
00726 pnew_hit->Reset(new THit (*h));
00727 h->Modify(2*where + 1, lmax = cmin - 1);
00728 (*pnew_hit)->Modify(2*where, lmin = cmax + 1);
00729 if((*pnew_hit)->GetIdentity() < min_idty) {
00730 pnew_hit->Reset(NULL);
00731 }
00732 return (h->GetIdentity() < min_idty)? -2: 0;
00733 }
00734 else if(ldif > rdif) {
00735
00736 if(lmin <= cmin && cmin <= lmax) {
00737 if(cmin == 0) return -2;
00738 h->Modify(2*where + 1, lmax = cmin - 1);
00739 rv = lmax;
00740 }
00741 if(lmin <= cmax && cmax <= lmax) {
00742 h->Modify(2*where, lmin = cmax + 1);
00743 rv = lmin;
00744 }
00745 }
00746 else {
00747
00748 if(lmin <= cmax && cmax <= lmax) {
00749 h->Modify(2*where, lmin = cmax + 1);
00750 rv = lmin;
00751 }
00752 if(lmin <= cmin && cmin <= lmax) {
00753 if(cmin == 0) return -2;
00754 h->Modify(2*where + 1, lmax = cmin - 1);
00755 rv = lmax;
00756 }
00757
00758 }
00759
00760 if(1 + lmax - lmin < min_hit_len || h->GetIdentity() < min_idty) {
00761 return -2;
00762 }
00763 }
00764 catch(CAlgoAlignUtilException&) {
00765 rv = -3;
00766 }
00767 return rv;
00768 }
00769
00770
00771 static THitRef sx_Merge(const THitRef& lhs, const THitRef& rhs, Uint1 where)
00772 {
00773 THitRef rv (0);
00774
00775 const bool sstrand (lhs->GetSubjStrand());
00776 if(sstrand != rhs->GetSubjStrand()) {
00777
00778 NCBI_THROW(CAlgoAlignUtilException, eInternal,
00779 "Cannot merge hits: different strands");
00780 }
00781
00782 const int qgap ((!sstrand && where == 1)?
00783 (int(lhs->GetQueryMin()) - int(rhs->GetQueryMax())):
00784 (int(rhs->GetQueryMin()) - int(lhs->GetQueryMax())));
00785
00786 const int sgap ((!sstrand && where == 0)?
00787 (int(lhs->GetSubjMin()) - int(rhs->GetSubjMax())):
00788 (int(rhs->GetSubjMin()) - int(lhs->GetSubjMax())));
00789
00790 const bool bv1 (abs(qgap) == 1 && abs(sgap) > 0);
00791 const bool bv2 (abs(sgap) == 1 && abs(qgap) > 0);
00792
00793 if(!bv1 && !bv2){
00794 return rv;
00795 }
00796
00797 typename THit::TTranscript x_lhs =
00798 THit::s_RunLengthDecode(lhs->GetTranscript());
00799 typename THit::TTranscript x_rhs =
00800 THit::s_RunLengthDecode(rhs->GetTranscript());
00801
00802 if(x_lhs.size() && x_rhs.size()) {
00803
00804 string x_gap;
00805 if(where == 0) {
00806 x_gap.assign(abs(sgap) - 1, 'I');
00807 }
00808 else {
00809 x_gap.assign(abs(qgap) - 1, 'D');
00810 }
00811
00812 string xcript;
00813 THitRef h;
00814 if(where == 0 || sstrand) {
00815 xcript = x_lhs + x_gap + x_rhs;
00816 h = lhs;
00817 }
00818 else {
00819 xcript = x_rhs + x_gap + x_lhs;
00820 h = rhs;
00821 }
00822
00823 rv.Reset (new THit(h->GetId(0), h->GetStart(0), h->GetStrand(0),
00824 h->GetId(1), h->GetStart(1), h->GetStrand(1),
00825 xcript));
00826 }
00827 else {
00828
00829 rv.Reset(new THit());
00830 rv->SetId(0, lhs->GetId(0));
00831 rv->SetId(1, lhs->GetId(1));
00832 TCoord box [4] = {
00833 lhs->GetQueryStart(), rhs->GetQueryStop(),
00834 lhs->GetSubjStart(), rhs->GetSubjStop()
00835 };
00836 rv->SetBox(box);
00837 }
00838
00839 return rv;
00840 }
00841
00842
00843 static bool s_TrackHit(const THit& h)
00844 {
00845 const string xcript (THit::s_RunLengthDecode(h.GetTranscript()));
00846 if(xcript.size()) {
00847
00848 TCoord q = h.GetQueryStart(), q0 = q, s = h.GetSubjStart(), s0 = s;
00849
00850 const int qinc = h.GetQueryStrand()? 1 : -1;
00851 const int sinc = h.GetSubjStrand()? 1 : -1;
00852
00853 ITERATE(string, ii, xcript) {
00854 switch(*ii) {
00855 case 'M' : case 'R' : q0 = q; q += qinc; s0 = s; s += sinc; break;
00856 case 'I' : s0 = s; s += sinc; break;
00857 case 'D' : q0 = q; q += qinc; break;
00858 default:
00859 NCBI_THROW(CAlgoAlignUtilException, eInternal,
00860 "Invalid transcript symbol");
00861 }
00862 }
00863
00864 const bool rv = (q0 == h.GetQueryStop() && s0 == h.GetSubjStop());
00865 return rv;
00866 }
00867 else {
00868 return true;
00869 }
00870 }
00871
00872
00873 static void sx_TM(Uint1 where,
00874 typename THitRefs::iterator ii_beg,
00875 typename THitRefs::iterator ii_end,
00876 const THitEnds& hit_ends,
00877 const double& maxlenfr,
00878 map<typename THitRefs::iterator, THitRef>& m)
00879 {
00880 typedef CHitComparator<THit> THitComparator;
00881 typename THitComparator::ESortCriterion sort_type (
00882 where == 0?
00883 THitComparator::eQueryMinScore:
00884 THitComparator::eSubjMinScore);
00885 THitComparator sorter (sort_type);
00886 stable_sort(ii_beg, ii_end, sorter);
00887
00888 typedef typename THitRefs::iterator TIter;
00889 typedef map<typename THitRefs::iterator, THitRef> TIterMap;
00890 for(TIter ii = ii_beg; ii != ii_end; ++ii) {
00891
00892 TIter jj = ii;
00893 const TCoord cmax = (*ii)->GetMax(where);
00894 TCoord cmin = 0;
00895 for(++jj; jj != ii_end && cmax >= cmin; ++jj) {
00896 cmin = (*jj)->GetMin(where);
00897 }
00898
00899 if(cmax + 1 == cmin) {
00900
00901 --jj;
00902 bool can_merge = true;
00903 const Uint1 where2 ((where + 1) % 2);
00904 const bool subj_strand ((*ii)->GetSubjStrand());
00905 if(subj_strand) {
00906 can_merge = (*ii)->GetMax(where2) < (*jj)->GetMin(where2);
00907 }
00908 else {
00909 can_merge = (*jj)->GetMax(where2) < (*ii)->GetMin(where2);
00910 }
00911
00912 if(can_merge) {
00913
00914
00915
00916 THitEnd he_ii, he_jj;
00917
00918 if(subj_strand) {
00919
00920 he_ii.m_Point = 2*where2 + 1;
00921 he_ii.m_Ptr = &(*ii);
00922 he_ii.m_X = (*ii)->GetStop(where2) + 1;
00923
00924 he_jj.m_Point = 2*where2;
00925 he_jj.m_Ptr = &(*jj);
00926 he_jj.m_X = (*jj)->GetStart(where2) - 1;
00927 }
00928 else {
00929
00930 he_jj.m_Point = 2*where2 + 1;
00931 he_jj.m_Ptr = &(*ii);
00932 he_jj.m_X = (*ii)->GetStop(where2) - 1;
00933
00934 he_ii.m_Point = 2*where2;
00935 he_ii.m_Ptr = &(*jj);
00936 he_ii.m_X = (*jj)->GetStart(where2) + 1;
00937 }
00938
00939 typedef typename THitEnds::const_iterator THitEndsIter;
00940 const THitEndsIter ii0 = hit_ends.lower_bound(he_ii);
00941 const THitEndsIter ii1 = hit_ends.upper_bound(he_jj);
00942
00943 const size_t max_len
00944 = size_t(maxlenfr * min((*ii)->GetLength(),
00945 (*jj)->GetLength())
00946 + 0.5);
00947
00948 for(THitEndsIter ii = ii0; ii != ii1; ++ii) {
00949
00950 const THitRef& h = *(ii->m_Ptr);
00951
00952 TCoord cmin (h->GetMin(where2));
00953 if(cmin < he_ii.m_X) cmin = he_ii.m_X;
00954
00955 TCoord cmax (h->GetMax(where2));
00956 if(cmax < he_jj.m_X) cmax = he_jj.m_X;
00957
00958 if(cmax - cmin + 1 > max_len) {
00959 can_merge = false;
00960 break;
00961 }
00962 }
00963 }
00964
00965 if(can_merge) {
00966
00967
00968
00969 typename TIterMap::iterator imi = m.find(ii);
00970 THitRef lhs (imi == m.end()? *ii: imi->second);
00971 if (m.find(jj) == m.end() && lhs.NotEmpty()) {
00972
00973 THitRef rv (sx_Merge(lhs, *jj, where));
00974 if(rv.NotNull()) {
00975 m[ii] = THitRef(0);
00976 m[jj] = rv;
00977 }
00978 }
00979 }
00980 }
00981 }
00982 }
00983
00984
00985
00986
00987 void static sx_TestAndMerge(typename THitRefs::iterator ii_beg,
00988 typename THitRefs::iterator ii_end,
00989 const THitEnds& hit_ends,
00990 const double& maxlenfr,
00991 THitRefs* pout)
00992 {
00993 typedef typename THitRefs::iterator TIter;
00994 typedef map<TIter, THitRef> TIterMap;
00995 TIterMap m;
00996
00997 sx_TM(0, ii_beg, ii_end, hit_ends, maxlenfr, m);
00998 sx_TM(1, ii_beg, ii_end, hit_ends, maxlenfr, m);
00999
01000
01001 for(TIter ii = ii_beg; ii != ii_end; ++ii) {
01002 typename TIterMap::iterator imi = m.find(ii);
01003 THitRef hr (imi == m.end()? *ii: imi->second);
01004 if(hr.NotEmpty()) {
01005 pout->push_back(hr);
01006 }
01007 }
01008 }
01009
01010 };
01011
01012
01013 END_NCBI_SCOPE
01014
01015
01016
01017
01018 #endif
01019
01020