00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034 #include <ncbi_pch.hpp>
00035 #include <algo/structure/cd_utils/cuCppNCBI.hpp>
00036
00037 #include <objects/seq/Seq_annot.hpp>
00038 #include <objects/seqalign/Seq_align_set.hpp>
00039 #include <objects/seqalign/Dense_diag.hpp>
00040 #include <objects/seqalign/Dense_seg.hpp>
00041 #include <objects/seqalign/Score.hpp>
00042 #include <objects/seqloc/Seq_interval.hpp>
00043 #include <objects/seqloc/Seq_loc.hpp>
00044 #include <objects/seqloc/PDB_seq_id.hpp>
00045 #include <objects/seqloc/PDB_mol_id.hpp>
00046 #include <objects/general/Object_id.hpp>
00047
00048 #include <algo/structure/cd_utils/cuSequence.hpp>
00049 #include <algo/structure/cd_utils/cuUtils.hpp>
00050 #include <algo/structure/cd_utils/cuAlign.hpp>
00051
00052 #include <stdio.h>
00053
00054 BEGIN_NCBI_SCOPE
00055 BEGIN_SCOPE(cd_utils)
00056
00057 bool GetSeqID(const CRef< CSeq_align >& seqAlign, CRef< CSeq_id >& SeqID, bool getSlave)
00058 {
00059
00060
00061
00062
00063 CRef< CDense_diag > DenDiag;
00064 CDense_diag::TIds IdsSet;
00065 CDense_diag::TIds::iterator i;
00066
00067 if (seqAlign.NotEmpty()) {
00068 if (seqAlign->GetSegs().IsDendiag() && GetFirstOrLastDenDiag(seqAlign, true, DenDiag)) {
00069 IdsSet = DenDiag->GetIds();
00070 } else if (seqAlign->GetSegs().IsDenseg()) {
00071 IdsSet = seqAlign->GetSegs().GetDenseg().GetIds();
00072 }
00073 i = IdsSet.begin();
00074 if (getSlave)
00075 {
00076 i++;
00077 }
00078 SeqID = (*i);
00079 return(true);
00080 }
00081 return(false);
00082 }
00083
00084 bool HasSeqID(const CRef< CSeq_align >& seqAlign, const CRef< CSeq_id >& SeqID, bool& isMaster)
00085 {
00086
00087
00088
00089
00090 bool hasMatch = false;
00091 CRef< CDense_diag > DenDiag;
00092 CDense_diag::TIds IdsSet;
00093 CDense_diag::TIds::iterator i;
00094
00095 if (seqAlign.NotEmpty()) {
00096 if (seqAlign->GetSegs().IsDendiag() && GetFirstOrLastDenDiag(seqAlign, true, DenDiag)) {
00097 IdsSet = DenDiag->GetIds();
00098 } else if (seqAlign->GetSegs().IsDenseg()) {
00099 IdsSet = seqAlign->GetSegs().GetDenseg().GetIds();
00100 }
00101 i = IdsSet.begin();
00102 while (!hasMatch && i != IdsSet.end()) {
00103 if (SeqIdsMatch(SeqID, *i)) {
00104 hasMatch = true;
00105 }
00106 ++i;
00107 }
00108 isMaster = (hasMatch && (--i == IdsSet.begin()));
00109 }
00110 return(hasMatch);
00111 }
00112
00113 int SeqAlignRemap(CRef< CSeq_align >& source, int iSeq, CRef< CSeq_align >& guide, int iMaster, CRef< CSeq_align >& mappedAlign, int iMasterNew, int iSeqNew, int flags, string& err) {
00114 int nBlocks = 0;
00115 TDendiag mappedAlignDD;
00116 TDendiag *sourceDD, *guideDD;
00117
00118
00119 err.erase();
00120 if (source.Empty()) {
00121 err = "SeqAlignRemap: Empty alignment in source.\n";
00122 } else if (guide.Empty()) {
00123 err = "SeqAlignRemap: Empty alignment in guide.\n";
00124 } else if (mappedAlign.Empty()) {
00125 err = "SeqAlignRemap: Empty alignment in target mapped alignment.\n";
00126 }
00127 if (err.size() > 0) return nBlocks;
00128
00129 if (source->GetDim() != guide->GetDim()) {
00130 err = "SeqAlignRemap: Inconsistent dimensions for source and guide alignments.\n";
00131
00132
00133 } else if (source->GetSegs().Which() != guide->GetSegs().Which()) {
00134 err = "SeqAlignRemap: Inconsistent segment types for source and guide alignments.\n";
00135 }
00136 if (err.size() > 0) return nBlocks;
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151 if (GetDDSetFromSeqAlign(*source, sourceDD) && GetDDSetFromSeqAlign(*guide, guideDD)) {
00152 mappedAlign->SetType(source->GetType());
00153 mappedAlign->SetDim(source->GetDim());
00154 mappedAlign->SetSegs().Select(source->GetSegs().Which());
00155 nBlocks = ddRemap(sourceDD, iSeq, guideDD, iMaster, &mappedAlignDD, iMasterNew, iSeqNew, flags, err);
00156 for (TDendiag::iterator ddIt = mappedAlignDD.begin(); ddIt != mappedAlignDD.end(); ++ddIt) {
00157 mappedAlign->SetSegs().SetDendiag().push_back(*ddIt);
00158 }
00159 }
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170 return nBlocks;
00171 }
00172
00173
00174 void MakeMaskedSeqAlign(const CRef< CSeq_align >& originalAlign, const CRef< CSeq_align >& maskAlign, CRef< CSeq_align >& maskedAlign, bool useMaskMaster, bool invertMask) {
00175
00176
00177 bool inputOK = true;
00178 bool isAligned, useOriginalMaster;
00179 TSeqPos newMasterStart, newSlaveStart, newLength;
00180 TSeqPos originalMasterStart, originalSlaveStart, originalLength, blockStart;
00181
00182 CRef< CSeq_id > masterId, slaveId, maskId;
00183 const TDendiag* originalDDSet;
00184 TDendiag* maskedDDSet;
00185 TDendiag_cit originalBlock, originalBlock_end;
00186
00187 if (originalAlign.Empty() || !GetSeqID(originalAlign, masterId, false)
00188 || !GetSeqID(originalAlign, slaveId, true)
00189 || !GetDDSetFromSeqAlign(*originalAlign, originalDDSet)) {
00190 inputOK = false;
00191 } else if (maskAlign.Empty() || !GetSeqID(maskAlign, maskId, !useMaskMaster)) {
00192 inputOK = false;
00193 } else if (maskedAlign.Empty()) {
00194 inputOK = false;
00195 }
00196
00197 if (SeqIdsMatch(masterId, maskId)) {
00198 useOriginalMaster = true;
00199 } else if (SeqIdsMatch(slaveId, maskId)) {
00200 useOriginalMaster = false;
00201 } else {
00202 useOriginalMaster = false;
00203 inputOK = false;
00204 }
00205
00206
00207
00208
00209
00210
00211 maskedAlign->SetType(originalAlign->GetType());
00212 maskedAlign->SetDim(originalAlign->GetDim());
00213 maskedAlign->SetSegs().Select(originalAlign->GetSegs().Which());
00214 if (inputOK && GetDDSetFromSeqAlign(*maskedAlign, maskedDDSet)) {
00215
00216
00217 originalBlock_end = originalDDSet->end();
00218 for (originalBlock = originalDDSet->begin(); originalBlock != originalBlock_end; ++originalBlock) {
00219
00220 originalMasterStart = (*originalBlock)->GetStarts().front();
00221 originalSlaveStart = (*originalBlock)->GetStarts().back();
00222 originalLength = (*originalBlock)->GetLen();
00223 newLength = 0;
00224 newMasterStart = 0;
00225 newSlaveStart = 0;
00226
00227
00228
00229
00230
00231
00232 blockStart = (( useOriginalMaster) ? originalMasterStart : originalSlaveStart);
00233 for (TSeqPos blockPos = 0; blockPos < originalLength; ++blockPos) {
00234 isAligned = IsPositionAligned(*maskAlign, blockPos + blockStart, useMaskMaster);
00235 if ((isAligned && !invertMask) || (!isAligned && invertMask)) {
00236 if (newLength == 0) {
00237 newMasterStart = blockPos + blockStart;
00238 newSlaveStart = blockPos + ((!useOriginalMaster) ? originalMasterStart : originalSlaveStart);
00239 }
00240 ++newLength;
00241 } else if (newLength > 0) {
00242 AddIntervalToDD(maskedDDSet, masterId, slaveId, newMasterStart, newSlaveStart, newLength);
00243 newLength = 0;
00244 }
00245 }
00246 if (newLength > 0) {
00247 AddIntervalToDD(maskedDDSet, masterId, slaveId, newMasterStart, newSlaveStart, newLength);
00248 }
00249
00250 }
00251
00252 }
00253
00254 }
00255
00256
00257 bool SeqAlignsAreEquivalent(const CRef< CSeq_align >& align1, const CRef< CSeq_align >& align2, bool checkMasters) {
00258 bool result = false;
00259 const TDendiag* ddSet1;
00260 const TDendiag* ddSet2;
00261
00262 if (GetDDSetFromSeqAlign(*align1, ddSet1) && GetDDSetFromSeqAlign(*align2, ddSet2)) {
00263 result = ddAreEquivalent(ddSet1, ddSet2, checkMasters);
00264 }
00265 return result;
00266 }
00267
00268 void SeqAlignSwapMasterSlave(CRef< CSeq_align >& seqAlign, CRef< CSeq_align >& swappedSeqAlign) {
00269
00270 int result = 0;
00271 TDendiag* originalDDSet;
00272 TDendiag* swappedDDSet;
00273
00274 swappedSeqAlign->Assign(*seqAlign);
00275 if (GetDDSetFromSeqAlign(*seqAlign, originalDDSet) && GetDDSetFromSeqAlign(*swappedSeqAlign, swappedDDSet)) {
00276 swappedDDSet->clear();
00277 result = ddRecompose(originalDDSet, 1, 0, swappedDDSet);
00278 }
00279 }
00280
00281
00282 bool ChangeSeqIdInSeqAlign(CRef< CSeq_align>& sa, const CRef< CSeq_id >& newSeqId, bool onMaster)
00283 {
00284 bool result = (sa->SetSegs().IsDendiag() && sa->SetSegs().SetDendiag().size() > 0);
00285 TDendiag_it ddIt, ddEnd;
00286 unsigned int index = (onMaster) ? 0 : 1;
00287
00288
00289 if (result) {
00290 ddIt = sa->SetSegs().SetDendiag().begin();
00291 ddEnd = sa->SetSegs().SetDendiag().end();
00292 for (; ddIt != ddEnd; ++ddIt) {
00293 if ((*ddIt)->GetDim() != 2 || (*ddIt)->GetIds().size() != 2) {
00294 result = false;
00295 break;
00296 }
00297 }
00298 }
00299
00300 if (result) {
00301 ddIt = sa->SetSegs().SetDendiag().begin();
00302 ddEnd = sa->SetSegs().SetDendiag().end();
00303 CDense_diag::TIds ids;
00304 for (; ddIt != ddEnd; ++ddIt) {
00305 ids = (*ddIt)->SetIds();
00306 ids[index]->Assign(*newSeqId);
00307 }
00308 }
00309
00310 return result;
00311 }
00312
00313
00314
00315 int MapPositionToMaster(int childPos, const CSeq_align& align) {
00316
00317 return MapPosition(align, childPos, CHILD_TO_MASTER);
00318 }
00319
00320
00321 int MapPositionToChild(int masterPos, const CSeq_align& align) {
00322
00323 return MapPosition(align, masterPos, MASTER_TO_CHILD);
00324 }
00325
00326
00327
00328 int MapPosition(const CSeq_align& seqAlign, int Position, CoordMapDir mapDir) {
00329
00330
00331
00332
00333
00334
00335 TDendiag_cit i, ddend;
00336 CDense_diag::TStarts::const_iterator k;
00337 int Start, Len, OtherStart;
00338
00339 const TDendiag* ddlist;
00340 if (GetDDSetFromSeqAlign(seqAlign, ddlist)) {
00341
00342 ddend = ddlist->end();
00343 for (i=ddlist->begin(); i!=ddend; i++) {
00344 k = (*i)->GetStarts().begin();
00345 Len = (*i)->GetLen();
00346 Start = (mapDir == MASTER_TO_CHILD) ? *k : *(++k);
00347
00348 k = (*i)->GetStarts().begin();
00349 OtherStart = (mapDir == MASTER_TO_CHILD) ? *(++k) : *k;
00350
00351 if ((Position >= Start) && (Position < (Start+Len))) {
00352 return(OtherStart + (Position-Start));
00353 }
00354 }
00355 }
00356
00357 return(INVALID_POSITION);
00358 }
00359
00360
00361
00362
00363 bool IsPositionAligned(const CSeq_align& seqAlign, int Position, bool onMaster) {
00364 bool result = false;
00365
00366 if (Position == INVALID_POSITION) {
00367 return result;
00368 }
00369
00370 const TDendiag* pDenDiagSet;
00371 if (GetDDSetFromSeqAlign(seqAlign, pDenDiagSet)) {
00372 result = IsPositionAligned(pDenDiagSet, Position, onMaster);
00373 }
00374 return result;
00375 }
00376
00377
00378 bool IsPositionAligned(const TDendiag*& pDenDiagSet, int Position, bool onMaster) {
00379 bool result = false;
00380 int start, stop;
00381 TDendiag_cit i, iend;
00382
00383 if (Position == INVALID_POSITION) {
00384 return result;
00385 }
00386
00387
00388 if (pDenDiagSet) {
00389 iend = pDenDiagSet->end();
00390 for (i=pDenDiagSet->begin(); i!=iend; i++) {
00391 start = (onMaster) ? (*i)->GetStarts().front() : (*i)->GetStarts().back();
00392 stop = start + (*i)->GetLen() - 1;
00393 if (Position >= start && Position <= stop) {
00394 result = true;
00395 break;
00396 }
00397 }
00398 }
00399 return result;
00400 }
00401
00402
00403 int GetAlignedPositions(const CRef< CSeq_align >& align1, const CRef< CSeq_align >& align2, vector<int>& alignedPositions, bool onMaster) {
00404
00405 int nBlocks, position;
00406 CRef< CSeq_id > align1Id, align2Id;
00407 vector<int> align1Blocks, align1Starts;
00408
00409 alignedPositions.clear();
00410
00411 if (align1.NotEmpty() && align2.NotEmpty()) {
00412
00413
00414 if (GetSeqID(align1, align1Id, !onMaster) && GetSeqID(align2, align2Id, !onMaster) &&
00415 SeqIdsMatch(align1Id, align2Id)) {
00416
00417 GetBlockLengths(align1, align1Blocks);
00418 GetBlockStarts(align1, align1Starts, onMaster);
00419
00420
00421 nBlocks = align1Blocks.size();
00422 for (int i = 0; i < nBlocks; ++i) {
00423 position = align1Starts[i];
00424 for (int j = 0; j < align1Blocks[i]; ++j) {
00425 if (IsPositionAligned(*align2, position, onMaster)) {
00426 alignedPositions.push_back(position);
00427 }
00428 ++position;
00429 }
00430 }
00431 }
00432 }
00433 return alignedPositions.size();
00434 }
00435
00436
00437 int GetNumAlignedResidues(const CRef< CSeq_align >& seqAlign) {
00438
00439 TDendiag_cit i;
00440 int Len=0;
00441
00442 if (seqAlign.Empty()) {
00443 return Len;
00444 }
00445
00446
00447 const TDendiag* pDenDiagSet;
00448 if (GetDDSetFromSeqAlign(*seqAlign, pDenDiagSet)) {
00449 for (i=pDenDiagSet->begin(); i!=pDenDiagSet->end(); i++) {
00450 Len += (*i)->GetLen();
00451 }
00452 }
00453 return(Len);
00454
00455 }
00456
00457 int GetLowerBound(const CRef< CSeq_align >& seqAlign, bool onMaster) {
00458
00459 int lowerBound = -1;
00460 if (seqAlign.Empty()) {
00461 return lowerBound;
00462 }
00463
00464 const TDendiag* pDenDiagSet;
00465 if (GetDDSetFromSeqAlign(*seqAlign, pDenDiagSet)) {
00466 lowerBound = (onMaster) ? pDenDiagSet->front()->GetStarts().front() : pDenDiagSet->front()->GetStarts().back();
00467 }
00468 return(lowerBound);
00469
00470 }
00471
00472 int GetUpperBound(const CRef< CSeq_align >& seqAlign, bool onMaster) {
00473 int upperBound = -1;
00474 if (seqAlign.Empty()) {
00475 return upperBound;
00476 }
00477
00478 const TDendiag* pDenDiagSet;
00479 if (GetDDSetFromSeqAlign(*seqAlign, pDenDiagSet)) {
00480 upperBound = (onMaster) ? pDenDiagSet->back()->GetStarts().front() : pDenDiagSet->back()->GetStarts().back();
00481 upperBound += pDenDiagSet->back()->GetLen() - 1;
00482 }
00483 return(upperBound);
00484
00485 }
00486
00487
00488
00489 void SetAlignedResiduesOnSequence(const CRef< CSeq_align >& align, const string& sequenceString, char*& pAlignedRes, bool isMaster) {
00490
00491 int length;
00492 int alignedResCtr = 0;
00493 int start = -1, stop = -1;
00494 CRef< CDense_diag > ddFirst, ddLast;
00495
00496 if (align.Empty() || sequenceString.size() < 1) {
00497 return;
00498 }
00499
00500 length = GetNumAlignedResidues(align);
00501 if (length < 1 || (int) sequenceString.size() < length) {
00502 return;
00503 } else {
00504
00505 if (!pAlignedRes) {
00506 pAlignedRes = new char[length];
00507 if (!pAlignedRes) return;
00508 }
00509 }
00510
00511 if (GetFirstOrLastDenDiag(align, true, ddFirst) && GetFirstOrLastDenDiag(align, false, ddLast)) {
00512 if (ddFirst.NotEmpty() && ddLast.NotEmpty()) {
00513 start = (isMaster) ? ddFirst->GetStarts().front() : ddFirst->GetStarts().back();
00514 stop = (isMaster) ? ddLast->GetStarts().front() : ddLast->GetStarts().back();
00515 stop += ddLast->GetLen() - 1;
00516 }
00517 }
00518
00519 alignedResCtr = 0;
00520 const TDendiag* pDenDiagSet;
00521 if (GetDDSetFromSeqAlign(*align, pDenDiagSet)) {
00522
00523 if (start >=0 && start <= stop && stop < (int) sequenceString.size()) {
00524 for (int i = start; i <= stop; ++i) {
00525 if (IsPositionAligned(pDenDiagSet, i, isMaster) && alignedResCtr < length) {
00526
00527 pAlignedRes[alignedResCtr] = sequenceString[i];
00528 ++alignedResCtr;
00529 }
00530 }
00531 }
00532 }
00533
00534
00535 if (alignedResCtr != length) {
00536 delete pAlignedRes;
00537 pAlignedRes = NULL;
00538 }
00539
00540 }
00541
00542
00543
00544
00545
00546
00547
00548
00549 int GetBlockNumberForResidue(int residue, const CRef< CSeq_align >& seqAlign, bool onMaster,
00550 vector<int>* starts, vector<int>* lengths) {
00551 int i = 0;
00552 int result = -1, nBlocks;
00553 vector<int> vstarts, vlengths;
00554
00555 if (residue >= 0 && GetBlockLengths(seqAlign, vlengths) > 0 && GetBlockStarts(seqAlign, vstarts, onMaster) > 0) {
00556 if (vlengths.size() == vstarts.size()) {
00557 nBlocks = vstarts.size();
00558 while (i < nBlocks && result < 0) {
00559 if (residue >= vstarts[i] && residue < vstarts[i] + vlengths[i]) {
00560 result = i;
00561 }
00562 ++i;
00563 }
00564 if (starts != NULL) {
00565 starts->insert(starts->begin(), vstarts.begin(), vstarts.end());
00566 }
00567 if (lengths != NULL) {
00568 lengths->insert(lengths->begin(), vlengths.begin(), vlengths.end());
00569 }
00570 }
00571 }
00572 return result;
00573 }
00574
00575
00576
00577 int GetBlockCount(const CRef< CSeq_align >& seqAlign) {
00578 int nBlocks = 0;
00579 if (seqAlign.Empty()) {
00580 return nBlocks;
00581 }
00582 if (seqAlign->GetSegs().IsDendiag()) {
00583 nBlocks = seqAlign->GetSegs().GetDendiag().size();
00584 }
00585 return nBlocks;
00586 }
00587
00588
00589
00590 int GetBlockLengths(const CRef< CSeq_align >& seqAlign, vector<int>& lengths) {
00591 int count = 0;
00592 int nBlocks = GetBlockCount(seqAlign);
00593 const TDendiag* pDenDiagSet = NULL;
00594 TDendiag_cit cit;
00595
00596 if (seqAlign.NotEmpty() && nBlocks > 0) {
00597 lengths.clear();
00598 if (GetDDSetFromSeqAlign(*seqAlign, pDenDiagSet)) {
00599 for (cit = pDenDiagSet->begin(); cit != pDenDiagSet->end(); ++cit) {
00600 lengths.push_back((*cit)->GetLen());
00601 count++;
00602 }
00603 }
00604 }
00605 count = (count == nBlocks) ? count: 0;
00606 return count;
00607 }
00608
00609
00610
00611 int GetBlockStartsForMaster(const CRef< CSeq_align >& seqAlign, vector<int>& starts) {
00612 return GetBlockStarts(seqAlign, starts, true);
00613 }
00614
00615
00616
00617 int GetBlockStarts(const CRef< CSeq_align >& seqAlign, vector<int>& starts, bool onMaster) {
00618 int start;
00619 int count = 0;
00620 int nBlocks = GetBlockCount(seqAlign);
00621 const TDendiag* pDenDiagSet = NULL;
00622 TDendiag_cit cit;
00623
00624 if (seqAlign.NotEmpty() && nBlocks > 0) {
00625 starts.clear();
00626 if (GetDDSetFromSeqAlign(*seqAlign, pDenDiagSet)) {
00627 for (cit = pDenDiagSet->begin(); cit != pDenDiagSet->end(); ++cit) {
00628 start = (onMaster) ? (*cit)->GetStarts().front() : (*cit)->GetStarts().back();
00629 starts.push_back(start);
00630 count++;
00631 }
00632 }
00633 }
00634 count = (count == nBlocks) ? count: 0;
00635 return count;
00636 }
00637
00638 bool GetDDSetFromSeqAlign(const CSeq_align& align, const TDendiag*& dd) {
00639 if (align.GetSegs().IsDendiag()) {
00640 dd = &(align.GetSegs().GetDendiag());
00641 return true;
00642 }
00643 return false;
00644 }
00645
00646
00647 bool GetDDSetFromSeqAlign(CSeq_align& align, TDendiag*& dd) {
00648 if (align.SetSegs().IsDendiag()) {
00649 dd = &(align.SetSegs().SetDendiag());
00650 return true;
00651 }
00652 return false;
00653 }
00654
00655
00656 bool GetFirstOrLastDenDiag(const CRef< CSeq_align >& seqAlign, bool First, CRef< CDense_diag >& DenDiag) {
00657
00658
00659
00660 const TDendiag* pDenDiagSet;
00661 TDendiag_cit k;
00662
00663 if (seqAlign.NotEmpty() && GetDDSetFromSeqAlign(*seqAlign, pDenDiagSet)) {
00664
00665 if (First) {
00666 k = pDenDiagSet->begin();
00667 }
00668 else {
00669 k = pDenDiagSet->end();
00670 k--;
00671 }
00672 DenDiag = (*k);
00673 return(true);
00674 }
00675 return(false);
00676 }
00677
00678 bool CheckSeqIdInDD(const CRef< CSeq_align >& seqAlign)
00679 {
00680 int iii;
00681 const TDendiag* pDenDiagSet;
00682 TDendiag_cit k;
00683 CDense_diag::TIds IdsSet;
00684 CDense_diag::TIds::iterator i;
00685 CRef< CSeq_id > master, slave, master2, slave2;
00686 if (seqAlign.NotEmpty() && GetDDSetFromSeqAlign(*seqAlign, pDenDiagSet))
00687 {
00688 iii=0;
00689 k = pDenDiagSet->begin();
00690 IdsSet = (*k)->GetIds();
00691 i = IdsSet.begin();
00692 master = *i;
00693 i++;
00694 slave = *i;
00695 k++;iii++;
00696 for (; k != pDenDiagSet->end(); k++, iii++)
00697 {
00698 IdsSet = (*k)->GetIds();
00699 i = IdsSet.begin();
00700 master2 = *i;
00701 i++;
00702 slave2 = *i;
00703 if (!(SeqIdsMatch(master, master2)) || !SeqIdsMatch(slave, slave2))
00704 return false;
00705 }
00706 }
00707 return true;
00708 }
00709
00710
00711
00712
00713
00714
00715
00716
00717
00718
00719 void MakeDDFromSeqLoc(CSeq_loc * pAl,TDendiag * pDD ) {
00720 int from, to;
00721
00722 if (!pAl) return;
00723
00724
00725 if (pDD && pAl->IsInt()) {
00726
00727 CSeq_interval& interval = pAl->SetInt();
00728 from=interval.SetFrom();
00729 to=interval.SetTo();
00730 CRef< CSeq_id > RefID(new CSeq_id);
00731 RefID = &interval.SetId();
00732 AddIntervalToDD(pDD,RefID,RefID,from,from,to-from+1);
00733
00734 } else if(pDD && pAl->IsPacked_int() ) {
00735 CPacked_seqint::Tdata::iterator s;
00736 for (s=pAl->SetPacked_int().Set().begin(); s!=pAl->SetPacked_int().Set().end(); s++) {
00737
00738 from=(*s)->GetFrom();
00739 to=(*s)->GetTo();
00740 CRef< CSeq_id > RefID(new CSeq_id);
00741 RefID = &((*s)->SetId());
00742 AddIntervalToDD(pDD,RefID,RefID ,from,from,to-from+1);
00743 }
00744 }
00745 }
00746
00747
00748 void MakeSeqLocFromDD(const TDendiag * pDD, CSeq_loc * pAl) {
00749 TDendiag_cit pp;
00750 int iDst;
00751 CDense_diag::TStarts::const_iterator pos;
00752 vector < CRef< CSeq_id > >::const_iterator pid;
00753
00754 for (iDst=0,pp=pDD->begin(); pp!=pDD->end(); pp++,iDst++){
00755 pos=(*pp)->GetStarts().begin();
00756 TSeqPos len=((*pp)->GetLen());
00757 TSeqPos posStart=*pos;
00758 pid=(*pp)->GetIds().begin();
00759
00760 CRef<CSeq_id> SeqID=*(pid);
00761
00762 if(pDD->size()==1){
00763 pAl->SetInt().SetFrom(posStart);
00764 pAl->SetInt().SetTo(posStart+len-1);
00765 pAl->SetInt().SetId(*SeqID);
00766 }else {
00767
00768
00769 CRef < CSeq_interval > intrvl(new CSeq_interval());
00770 intrvl->SetFrom(posStart);
00771 intrvl->SetTo(posStart+len-1);
00772 intrvl->SetId(*SeqID);
00773 pAl->SetPacked_int().Set().push_back(intrvl);
00774 }
00775 }
00776 }
00777
00778 void AddIntervalToDD(TDendiag * pDD,CRef<CSeq_id> seqID1, CRef<CSeq_id> seqID2,TSeqPos st1,TSeqPos st2, TSeqPos lll)
00779
00780 {
00781 CRef< CSeq_id > idMaster(new CSeq_id);
00782 idMaster.Reset(seqID1);
00783 CRef< CSeq_id > idSeq(new CSeq_id);
00784 idSeq.Reset(seqID2);
00785
00786 CRef<CDense_diag> newDD(new CDense_diag);
00787 newDD->SetDim(2);
00788
00789
00790 newDD->SetIds().push_back(idMaster);
00791 newDD->SetIds().push_back(idSeq);
00792 newDD->SetStarts().push_back(st1);
00793 newDD->SetStarts().push_back(st2);
00794 newDD->SetLen()=lll;
00795 pDD->push_back(newDD);
00796 }
00797
00798
00799 bool GetDenDiagSet(const CRef< CSeq_annot >& seqAnnot, int Row, const TDendiag*& pDenDiagSet) {
00800
00801
00802
00803
00804
00805
00806
00807
00808
00809 list< CRef< CSeq_align > >::const_iterator j;
00810
00811 if (seqAnnot->GetData().IsAlign()) {
00812
00813 if (Row == 0) j = seqAnnot->GetData().GetAlign().begin();
00814 else {
00815 int Count = 0;
00816 for (j= seqAnnot->GetData().GetAlign().begin();
00817 j!= seqAnnot->GetData().GetAlign().end(); j++) {
00818 if (++Count == Row) break;
00819 }
00820 }
00821 if ((*j)->GetSegs().IsDendiag()) {
00822
00823 pDenDiagSet = &((*j)->GetSegs().GetDendiag());
00824 return(true);
00825 }
00826 }
00827 return(false);
00828 }
00829
00830 bool SetDenDiagSet(CRef< CSeq_annot >& seqAnnot, int Row, TDendiag*& pDenDiagSet) {
00831
00832
00833
00834
00835 list< CRef< CSeq_align > >::iterator j;
00836
00837 if (seqAnnot->GetData().IsAlign()) {
00838
00839 if (Row == 0) j = seqAnnot->SetData().SetAlign().begin();
00840 else {
00841 int Count = 0;
00842 for (j= seqAnnot->SetData().SetAlign().begin();
00843 j!= seqAnnot->SetData().SetAlign().end(); j++) {
00844 if (++Count == Row) break;
00845 }
00846 }
00847 if ((*j)->SetSegs().IsDendiag()) {
00848
00849 pDenDiagSet = &((*j)->SetSegs().SetDendiag());
00850 return(true);
00851 }
00852 }
00853 return(false);
00854 }
00855
00856
00857 void BuildAdjacentDiags(const TDendiag_cit& begin_orig, const TDendiag_cit& end_orig, TDendiag* adj)
00858 {
00859
00860
00861
00862
00863
00864
00865 int start, len, start_adj, len_adj;
00866 int start_slave, start_adj_slave;
00867 bool appended = false;
00868
00869 CRef<CDense_diag> dd_cref;
00870 TDendiag_cit orig_ci;
00871 TDendiag_it adj_ci;
00872 CDense_diag::TStarts::iterator start_adj_i;
00873
00874
00875 for (orig_ci = begin_orig; orig_ci != end_orig; ++orig_ci) {
00876 appended = false;
00877
00878 start = (*orig_ci)->GetStarts().front();
00879 start_slave = (*orig_ci)->GetStarts().back();
00880 len = (*orig_ci)->GetLen();
00881
00882
00883
00884
00885 for (adj_ci = adj->begin(); adj_ci != adj->end(); ++adj_ci) {
00886 start_adj = (*adj_ci)->GetStarts().front();
00887 start_adj_slave = (*adj_ci)->GetStarts().back();
00888 len_adj = (*adj_ci)->GetLen();
00889
00890 if (start == start_adj + len_adj && start_slave == start_adj_slave + len_adj) {
00891
00892 (*adj_ci)->SetLen(len + len_adj);
00893 appended = true;
00894 } else if (start + len == start_adj && start_slave + len == start_adj_slave) {
00895
00896
00897 for (start_adj_i = (*adj_ci)->SetStarts().begin();
00898 start_adj_i != (*adj_ci)->SetStarts().end(); ++start_adj_i) {
00899 *start_adj_i -= len;
00900 }
00901 (*adj_ci)->SetLen(len + len_adj);
00902 appended = true;
00903 }
00904
00905 }
00906 if (!appended) {
00907 dd_cref = new CDense_diag();
00908 dd_cref->Assign(**orig_ci);
00909 adj->push_back(dd_cref);
00910 }
00911 }
00912 }
00913
00914
00915 bool EraseRow(CRef< CSeq_annot >& seqAnnot, int RowIndex) {
00916
00917
00918
00919 list< CRef< CSeq_align > >::iterator j, jend;
00920 int RowCount;
00921
00922 if (RowIndex == 0) return(false);
00923
00924 if (seqAnnot->GetData().IsAlign()) {
00925 RowCount = 1;
00926 jend = seqAnnot->SetData().SetAlign().end();
00927 for (j= seqAnnot->SetData().SetAlign().begin(); j != jend; j++) {
00928 if (RowCount == RowIndex) {
00929 seqAnnot->SetData().SetAlign().erase(j);
00930 return(true);
00931 }
00932 RowCount++;
00933 if (RowCount > RowIndex) break;
00934 }
00935 }
00936 return(false);
00937 }
00938
00939
00940 CRef< CSeq_align > ExtractFirstSeqAlign(CRef< CSeq_align > seqAlign)
00941 {
00942 if (seqAlign.Empty())
00943 return seqAlign;
00944 if (!seqAlign->GetSegs().IsDisc())
00945 return seqAlign;
00946 if (seqAlign->GetSegs().GetDisc().CanGet())
00947 {
00948 const list< CRef< CSeq_align > >& saList = seqAlign->GetSegs().GetDisc().Get();
00949 if (saList.begin() != saList.end())
00950 return ExtractFirstSeqAlign(*saList.begin());
00951 }
00952 CRef< CSeq_align > nullRef;
00953 return nullRef;
00954 }
00955
00956
00957 int ddLen(TDendiag * pDD)
00958 {
00959 TDendiag_cit pp;
00960 int staLen=0;
00961
00962 for (pp=pDD->begin(); pp!=pDD->end(); pp++)
00963 {
00964 staLen+=((*pp)->GetLen());
00965 }
00966
00967 return staLen;
00968 }
00969
00970
00971 string ddAlignInfo(TDendiag * pGuideDD)
00972 {
00973 TDendiag_cit ppGuide;
00974 int iDst;
00975 CDense_diag::TStarts::const_iterator pos;
00976 vector < CRef< CSeq_id > >::const_iterator pid;
00977 string ret="";
00978 char buf[1024];
00979
00980 for (iDst=0,ppGuide=pGuideDD->begin(); ppGuide!=pGuideDD->end(); ppGuide++,iDst++){
00981 pos=(*ppGuide)->GetStarts().begin();
00982 TSeqPos lenGuide=((*ppGuide)->GetLen());
00983 TSeqPos posMasterGuide=*pos;
00984 TSeqPos posSeqGuide=*(++pos);
00985
00986 pid=(*ppGuide)->GetIds().begin();
00987 CRef<CSeq_id> GuideMasterSeqID=*(pid);
00988 CRef<CSeq_id> GuideSeqID=*(++pid);
00989
00990 sprintf(buf,"[%s]/[%s](%d) ",GetSeqIDStr(GuideMasterSeqID).c_str(),GetSeqIDStr(GuideSeqID).c_str(),(int)pGuideDD->size());
00991 if(!iDst){
00992 ret+=buf;
00993 }
00994 sprintf(buf,"#%d=[%d-%d]/[%d-%d](%d) ",iDst,posMasterGuide+1,posMasterGuide+lenGuide,posSeqGuide+1,posSeqGuide+lenGuide,lenGuide);
00995 ret+=buf;
00996 }
00997 return ret;
00998 }
00999
01000 int ddRecompose(TDendiag * pGuideDD,int iMaster, int iSeq,TDendiag * pResultDD)
01001 {
01002 TDendiag_it ppGuide;
01003 int iDst;
01004 CDense_diag::TStarts::iterator pos,ppos;
01005 vector < CRef< CSeq_id > >::iterator pid,ppid;
01006
01007 for (iDst=0,ppGuide=pGuideDD->begin(); ppGuide!=pGuideDD->end(); ppGuide++,iDst++){
01008 ppos=pos=(*ppGuide)->SetStarts().begin();
01009 TSeqPos lenGuide=((*ppGuide)->GetLen());
01010 TSeqPos posMasterGuide=*pos;
01011 TSeqPos posSeqGuide=*(++pos);
01012
01013
01014
01015
01016 ppid=pid=(*ppGuide)->SetIds().begin();
01017 CRef<CSeq_id> GuideMasterSeqID=*(pid);
01018 CRef<CSeq_id> GuideSeqID=*(++pid);
01019
01020
01021
01022
01023 AddIntervalToDD(pResultDD,iMaster==0 ? GuideMasterSeqID : GuideSeqID , iSeq==0 ? GuideMasterSeqID : GuideSeqID ,iMaster==0 ? posMasterGuide : posSeqGuide ,iSeq==0 ? posMasterGuide : posSeqGuide , lenGuide);
01024 }
01025 return iDst;
01026 }
01027
01028 int ddRenameSeqID(TDendiag * pGuideDD,int iNum, CRef< CSeq_id > & seqID)
01029 {
01030 TDendiag_it ppGuide;
01031 int iDst;
01032 vector < CRef< CSeq_id > >::iterator pid;
01033
01034 for (iDst=0,ppGuide=pGuideDD->begin(); ppGuide!=pGuideDD->end(); ppGuide++,iDst++){
01035
01036 CRef< CSeq_id > idCopy(new CSeq_id);
01037 idCopy.Reset(seqID);
01038
01039 pid=(*ppGuide)->SetIds().begin();
01040 if(iNum)++pid;
01041 *(pid)=idCopy;
01042 }
01043 return iDst;
01044 }
01045
01046
01047 bool ddAreEquivalent(const TDendiag * pDD1, const TDendiag * pDD2, bool checkMasters)
01048 {
01049 TDendiag_cit pp1,pp2;
01050 CDense_diag::TStarts::const_iterator pos1,pos2;
01051 vector < CRef< CSeq_id > >::const_iterator pid1,pid2;
01052 bool isSimilar=true;
01053
01054 if(pDD1->size()!=pDD2->size())
01055 return false;
01056
01057 for (pp1=pDD1->begin(),pp2=pDD2->begin(); pp1!=pDD1->end() && pp2!=pDD2->end(); pp1++,pp2++){
01058 pos1=(*pp1)->GetStarts().begin();
01059 TSeqPos lenGuide1=((*pp1)->GetLen());
01060 TSeqPos posMasterGuide1=*pos1;
01061 TSeqPos posSeqGuide1=*(++pos1);
01062 pid1=(*pp1)->GetIds().begin();
01063 CRef<CSeq_id> idMas1=*(pid1);
01064 CRef<CSeq_id> idSlv1=*(++pid1);
01065
01066 pos2=(*pp2)->GetStarts().begin();
01067 TSeqPos lenGuide2=((*pp2)->GetLen());
01068 TSeqPos posMasterGuide2=*pos2;
01069 TSeqPos posSeqGuide2=*(++pos2);
01070 pid2=(*pp2)->GetIds().begin();
01071 CRef<CSeq_id> idMas2=*(pid2);
01072 CRef<CSeq_id> idSlv2=*(++pid2);
01073
01074
01075 if( !SeqIdsMatch(idSlv1, idSlv2) ||
01076 lenGuide1!=lenGuide2 ||
01077 posMasterGuide1!=posMasterGuide2 ||
01078 posSeqGuide1!=posSeqGuide2){
01079 isSimilar=false;
01080 break;
01081 }
01082
01083 if (checkMasters && !SeqIdsMatch(idMas1, idMas2)) {
01084 isSimilar=false;
01085 break;
01086 }
01087 }
01088 return isSimilar;
01089 }
01090
01091
01092
01093
01094
01095
01096
01097
01098
01099
01100
01101
01102
01103 typedef struct {
01104 int hits;
01105 int crdSeq[4];
01106 CRef< CSeq_id > seqID[4];
01107 int secNum[4];
01108 }ALICORD;
01109
01110 static int ddAcumAliCord(TDendiag * pDD, int interRow, ALICORD * acL,int seqRow)
01111 {
01112 TDendiag_cit pp;
01113 vector < CRef< CSeq_id > >::const_iterator pID;
01114 int maxPos=0,i,iSec;
01115 CDense_diag::TStarts::const_iterator pos;
01116
01117
01118 for (iSec=0,pp=pDD->begin(); pp!=pDD->end(); pp++,iSec++)
01119 {
01120 pos=(*pp)->GetStarts().begin();
01121 TSeqPos len=((*pp)->GetLen());
01122 TSeqPos posSeq=*(pos);
01123 TSeqPos posInter=*(++pos);
01124
01125 pID=(*(pDD->begin()))->GetIds().begin();
01126 CRef< CSeq_id > idSeq=*(pID);
01127 CRef< CSeq_id > idInter=*(++pID);
01128
01129 if(interRow==0){
01130 TSeqPos tmp=posSeq;posSeq=posInter;posInter=tmp;
01131 CRef< CSeq_id > tmi=idSeq;idSeq=idInter;idInter=tmi;
01132 }
01133
01134
01135 if(!acL){
01136 if(maxPos < (int) (posInter+len) )maxPos = posInter+len;
01137 }
01138 else {
01139 for(i=posInter;i< (int) (posInter+len);i++){
01140 acL[i].hits++;
01141 acL[i].crdSeq[0]=i;
01142 acL[i].crdSeq[seqRow]=posSeq+(i-posInter);
01143 acL[i].seqID[0]=idInter;
01144 acL[i].seqID[seqRow]=idSeq;
01145 acL[i].secNum[seqRow]=iSec;
01146 acL[i].secNum[0]=iSec;
01147 }
01148
01149 }
01150 }
01151
01152 return maxPos;
01153 }
01154
01155
01156 static int ddScanAliCord(TDendiag * pDDList, ALICORD * acL, int maxLen,int rowMaster,int rowSeq,int iRowFollowStructure,int hitCnt)
01157 {
01158
01159 int iCnt=0,i,is,ie;
01160
01161 for(i=0;i<maxLen;)
01162 {
01163
01164 if(acL[i].hits!=hitCnt){
01165
01166 if(acL[i].hits>3)return 0;
01167 i++;continue;
01168 }
01169 is=i;
01170
01171
01172 while(acL[i].hits==hitCnt){
01173 if(iRowFollowStructure!=-1){
01174 if(acL[i].secNum[iRowFollowStructure]!=acL[is].secNum[iRowFollowStructure])
01175 break;
01176 } else {
01177 if( acL[i].secNum[rowMaster]!=acL[is].secNum[rowMaster] ||
01178 acL[i].secNum[rowSeq]!=acL[is].secNum[rowSeq] )
01179 break;
01180 }
01181
01182 if(i>is && acL[i].crdSeq[rowSeq]!=acL[i-1].crdSeq[rowSeq]+1)
01183 break;
01184 i++;
01185 }
01186 ie=i;
01187
01188 TSeqPos posMasterNew=acL[is].crdSeq[rowMaster];
01189 TSeqPos posSeqNew=acL[is].crdSeq[rowSeq];
01190 TSeqPos lenNew=ie-is;
01191
01192
01193 CRef< CSeq_id > idMaster(new CSeq_id);
01194 idMaster.Reset(acL[is].seqID[rowMaster]);
01195 CRef< CSeq_id > idSeq(new CSeq_id);
01196 idSeq.Reset(acL[is].seqID[rowSeq]);
01197
01198
01199
01200 {
01201 CRef<CDense_diag> newDD(new CDense_diag);
01202 newDD->SetDim(2);
01203 newDD->SetIds().push_back(idMaster);
01204 newDD->SetIds().push_back(idSeq);
01205 newDD->SetStarts().push_back(posMasterNew);
01206 newDD->SetStarts().push_back(posSeqNew);
01207 newDD->SetLen()=lenNew;
01208
01209 pDDList->push_back(newDD);
01210 iCnt++;
01211 }
01212
01213 }
01214 return iCnt;
01215 }
01216
01217 int ddRemap(TDendiag * pSrcDD,int iSeq,TDendiag * pGuideDD, int iMaster,TDendiag * newDDlist,int iMasterNew, int iSeqNew,int flags,string err)
01218 {
01219
01220 int maxLen2=ddAcumAliCord(pGuideDD,1-iMaster,0,1);
01221 int maxLen1=ddAcumAliCord(pSrcDD,1-iSeq,0,2);
01222 int maxLen=maxLen1>maxLen2 ? maxLen1 : maxLen2;maxLen++;
01223 int iFollow=-1;
01224
01225
01226 ALICORD* allArr=(ALICORD * )malloc(sizeof(ALICORD)* maxLen);
01227 if(!allArr) {
01228 err="remapDD error: couldn't allocate enough memory.";
01229 return 0;
01230 }
01231
01232 memset(allArr,0,maxLen*sizeof(ALICORD));
01233
01234
01235
01236
01237
01238
01239 if(flags&DD_FOLLOWGUIDE){
01240 ddAcumAliCord(pSrcDD,1-iSeq,allArr,2);
01241 ddAcumAliCord(pGuideDD,1-iMaster,allArr,1);
01242 iFollow=0;
01243 }else {
01244 ddAcumAliCord(pGuideDD,1-iMaster,allArr,1);
01245 ddAcumAliCord(pSrcDD,1-iSeq,allArr,2);
01246 iFollow=0;
01247 }
01248
01249 int iCnt=ddScanAliCord(newDDlist,allArr,maxLen,iMasterNew,iSeqNew,iFollow,2);
01250
01251 free( (void * )allArr) ;
01252 return iCnt;
01253 }
01254
01255 string ddDifferenceResidues(TDendiag * pSrcDD,TDendiag * pGuideDD,TDendiag * newDDlist)
01256 {
01257 TDendiag DifferenceDD;
01258 if(!newDDlist)newDDlist=&DifferenceDD;
01259
01260 int maxLen2=ddAcumAliCord(pGuideDD,0,0,1);
01261 int maxLen1=ddAcumAliCord(pSrcDD,0,0,2);
01262 int maxLen=maxLen1>maxLen2 ? maxLen1 : maxLen2;maxLen++;
01263
01264 ALICORD * allArr=(ALICORD * )malloc(sizeof(ALICORD)* maxLen);if(!allArr)return NULL;
01265 memset(allArr,0,maxLen*sizeof(ALICORD));
01266
01267 ddAcumAliCord(pSrcDD,0,allArr,2);
01268 ddAcumAliCord(pGuideDD,0,allArr,1);
01269
01270
01271 ddScanAliCord(newDDlist,allArr,maxLen,0,0,-1,1);
01272
01273 free( (void * )allArr) ;
01274 return ddAlignInfo(newDDlist);
01275 }
01276
01277
01278
01279
01280
01281
01282
01283
01284
01285
01286
01287 #define scanTill( v_cond ) while(*ptr && *ptr!='\n' && (v_cond) )
01288 bool sscanSeqId (const char * & ptr,CSeq_id & seqid)
01289 {
01290 char typ[1024], id[1024];
01291
01292 int ityp=0,iid=0;
01293 typ[ityp]=0;id[iid]=0;
01294
01295
01296 scanTill( *ptr==' ')ptr++;
01297 scanTill( *ptr!=' '){
01298 typ[ityp++]=*ptr;
01299 ptr++;
01300 }typ[ityp]=0;
01301 scanTill( *ptr==' ')ptr++;
01302 scanTill( *ptr!=' '){
01303 id[iid++]=*ptr;
01304 ptr++;
01305 }id[iid]=0;
01306 int gi;
01307 if( !strcmp(typ,"gi") && sscanf(id,"%d",&gi)==1 ){
01308 seqid.SetGi(gi);
01309 }
01310 else if( !strcmp(typ,"pdb") ){
01311 char * ss=strrchr(id,'_');
01312
01313 if( ss){
01314 *ss=0;
01315 ss++;
01316 seqid.SetPdb().SetChain(*ss);
01317 }
01318 seqid.SetPdb().SetMol().Set(id);
01319 }
01320 else return false;
01321 return true;
01322 }
01323
01324 const char * sscanSeqLocIntervals(const char * ptr, CSeq_loc & sq)
01325 {
01326 CSeq_id * sid=new CSeq_id();
01327 int howmany,from,to;
01328
01329
01330
01331 sscanSeqId (ptr,*sid);
01332
01333
01334 scanTill(true){
01335 scanTill( *ptr==' ')ptr++;
01336 if(!(howmany=sscanf(ptr,"%d-%d",&from,&to)))
01337 break;
01338
01339 if (howmany==1)to=from+1;
01340
01341 CRef < CSeq_interval > intrvl(new CSeq_interval());
01342 intrvl->SetFrom(from);
01343 intrvl->SetTo(to);
01344 intrvl->SetId(*sid);
01345 sq.SetPacked_int().Set().push_back(intrvl);
01346 scanTill( *ptr!=' ')ptr++;
01347 }
01348
01349
01350
01351
01352
01353 ptr=strchr(ptr,'\n');
01354 if(ptr)ptr++;
01355
01356 return ptr;
01357 }
01358
01359
01360
01361
01362
01363 void ExtractScoreFromSeqAlign(const CRef< CSeq_align >& seqAlign, int flags, vector<double>& scores) {
01364 ExtractScoreFromSeqAlign(seqAlign.GetPointer(), flags, scores);
01365 }
01366
01367 void ExtractScoreFromSeqAlign(const CSeq_align* seqAlign, int flags, vector<double>& scores) {
01368
01369 int count=0;
01370 TDendiag_cit ddit;
01371
01372 scores[0] = E_VAL_NOT_FOUND;
01373 scores[1] = SCORE_NOT_FOUND;
01374 scores[2] = SCORE_NOT_FOUND;
01375 scores[3] = SCORE_NOT_FOUND;
01376
01377 if (!seqAlign) {
01378 return;
01379 }
01380
01381 if (seqAlign->IsSetScore()) {
01382
01383 count = ExtractScoreFromScoreList(seqAlign->GetScore(), flags, scores);
01384 } else {
01385
01386
01387
01388 if (seqAlign->GetSegs().IsDendiag()) {
01389 const TDendiag ddList = seqAlign->GetSegs().GetDendiag();
01390 if (ddList.size() > 0) {
01391 ddit = ddList.begin();
01392 while (ddit != ddList.end() && count == 0) {
01393 if ((*ddit)->IsSetScores()) {
01394 count = ExtractScoreFromScoreList((*ddit)->GetScores(), flags, scores);
01395 }
01396 ++ddit;
01397 }
01398 }
01399
01400
01401
01402 } else if (seqAlign->GetSegs().IsDenseg()) {
01403 TDendiag ddList;
01404 Denseg2DenseDiagList(seqAlign->GetSegs().GetDenseg(), ddList);
01405 if (ddList.size() > 0) {
01406 ddit = ddList.begin();
01407 while (ddit != ddList.end() && count == 0) {
01408 if ((*ddit)->IsSetScores()) {
01409 count = ExtractScoreFromScoreList((*ddit)->GetScores(), flags, scores);
01410 }
01411 ++ddit;
01412 }
01413 }
01414 }
01415
01416 }
01417 }
01418
01419
01420 int ExtractScoreFromScoreList(const CSeq_align::TScore& scores, int flags, vector<double>& values) {
01421
01422 int count = 0;
01423
01424
01425 CSeq_align::TScore::const_iterator score_ci, score_ci_end = scores.end();
01426 for (score_ci=scores.begin(); score_ci!=score_ci_end; score_ci++) {
01427 if ((*score_ci)->IsSetId() && (*score_ci)->GetId().IsStr()) {
01428 if ((flags&E_VALUE) && (*score_ci)->GetValue().IsReal() && (*score_ci)->GetId().GetStr() == "e_value") {
01429 values[0] = (*score_ci)->GetValue().GetReal();
01430 count++;
01431 }
01432 if ((flags&RAW_SCORE) && (*score_ci)->GetValue().IsInt() && (*score_ci)->GetId().GetStr() == "score") {
01433 values[1] = (*score_ci)->GetValue().GetInt();
01434 count++;
01435 }
01436 if ((flags&BIT_SCORE) && (*score_ci)->GetValue().IsReal() && (*score_ci)->GetId().GetStr() == "bit_score") {
01437 values[2] = (*score_ci)->GetValue().GetReal();
01438 count++;
01439 }
01440 if ((flags&N_IDENTICAL) && (*score_ci)->GetValue().IsInt() && (*score_ci)->GetId().GetStr() == "num_ident") {
01441 values[3] = (*score_ci)->GetValue().GetInt();
01442 count++;
01443 }
01444 }
01445 }
01446 return count;
01447 }
01448
01449
01450
01451
01452
01453 CRef<CSeq_align> Denseg2DenseDiagList(const CRef<CSeq_align>& denseSegSeqAlign)
01454 {
01455 CRef<CSeq_align> newSa(new CSeq_align);
01456 newSa->Assign(*denseSegSeqAlign);
01457
01458 if (denseSegSeqAlign.NotEmpty() && denseSegSeqAlign->GetSegs().IsDenseg()) {
01459 TDendiag ddList;
01460 Denseg2DenseDiagList(denseSegSeqAlign->GetSegs().GetDenseg(), ddList);
01461 newSa->SetSegs().SetDendiag() = ddList;
01462 }
01463
01464 return newSa;
01465 }
01466
01467
01468
01469
01470
01471 void Denseg2DenseDiagList(const CDense_seg& ds, TDendiag& ddl)
01472 {
01473 const CDense_seg::TIds& ids = ds.GetIds();
01474 const CDense_seg::TStarts& starts = ds.GetStarts();
01475 const CDense_seg::TStrands& strands = ds.GetStrands();
01476 const CDense_seg::TLens& lens = ds.GetLens();
01477 const CDense_seg::TScores& scores = ds.GetScores();
01478 const CDense_seg::TNumseg& numsegs = ds.GetNumseg();
01479 const CDense_seg::TDim& numrows = ds.GetDim();
01480 int total = numrows * numsegs;
01481 int pos = 0;
01482
01483 int rows_per_seg;
01484
01485 bool strands_exist = ((int) strands.size() == total);
01486 bool scores_exist = ((int) scores.size() == total);
01487
01488 for (CDense_seg::TNumseg seg = 0; seg < numsegs; seg++) {
01489 rows_per_seg = 0;
01490 CRef<CDense_diag> dd (new CDense_diag);
01491 dd->SetLen(lens[seg]);
01492 for (CDense_seg::TDim row = 0; row < numrows; row++) {
01493 const TSignedSeqPos& start = starts[pos];
01494 if (start >=0) {
01495 rows_per_seg++;
01496 dd->SetIds().push_back(ids[row]);
01497 dd->SetStarts().push_back(start);
01498 if (strands_exist) {
01499 dd->SetStrands().push_back(strands[pos]);
01500 }
01501 if (scores_exist) {
01502 dd->SetScores().push_back(scores[pos]);
01503 }
01504 }
01505 pos++;
01506 }
01507 if (rows_per_seg >= 2) {
01508 dd->SetDim(rows_per_seg);
01509 ddl.push_back(dd);
01510 }
01511 }
01512 }
01513
01514 bool GetPendingSeqId(CCdCore * pCD,int irow,CRef <CSeq_id> & seqID)
01515 {
01516 int i ;
01517 list <CRef <CUpdate_align> > ::iterator pPen;
01518 for(i=0,pPen=pCD->SetPending().begin();pPen!=pCD->SetPending().end();pPen++,i++){
01519 if(i<irow)
01520 continue;
01521 CSeq_align * pAl = *((*pPen)->SetSeqannot().SetData().SetAlign().begin());
01522 CDense_diag * pDDPen=*(pAl->SetSegs().SetDendiag().begin());
01523 vector < CRef< CSeq_id > >::const_iterator pid=pDDPen->GetIds().begin();
01524 seqID=*(++pid);
01525 return true;
01526 }
01527 return false;
01528 }
01529
01530 bool GetPendingFootPrint(CCdCore * pCD,int irow,int * from, int * to)
01531 {
01532 int i ;
01533 list <CRef <CUpdate_align> > ::iterator pPen;
01534 TDendiag_cit pD ;
01535 CDense_diag::TStarts::const_iterator pid;
01536 CRef<CDense_diag > pDDPen;
01537 CSeq_align * pAl ;
01538
01539 for(i=0,pPen=pCD->SetPending().begin();pPen!=pCD->SetPending().end();pPen++,i++){
01540 if(i<irow)
01541 continue;
01542 pAl = *((*pPen)->SetSeqannot().SetData().SetAlign().begin());
01543 pD= pAl->SetSegs().SetDendiag().begin();
01544 pDDPen=*(pD);
01545 pid=pDDPen->GetStarts().begin();
01546 *from=*(++pid);
01547
01548 pD= pAl->SetSegs().SetDendiag().end();pD--;
01549 pDDPen=*(pD);
01550 pid=pDDPen->GetStarts().begin();
01551 (*to)=*(++pid);
01552 (*to)+=pDDPen->GetLen()-1;
01553 return true;
01554 }
01555 return false;
01556 }
01557 bool GetPendingDD(CCdCore * pCD,int irow,TDendiag* & pDenDiagSet)
01558 {
01559 int i ;
01560 list <CRef <CUpdate_align> > ::iterator pPen;
01561 for(i=0,pPen=pCD->SetPending().begin();pPen!=pCD->SetPending().end();pPen++,i++){
01562 if(i<irow)
01563 continue;
01564 CSeq_align * pAl = *((*pPen)->SetSeqannot().SetData().SetAlign().begin());
01565 pDenDiagSet=&(pAl->SetSegs().SetDendiag());
01566
01567
01568 return true;
01569 }
01570 return false;
01571 }
01572
01573
01574
01575
01576 int GetPssmIdFromSeqAlign(const CRef<CSeq_align >& seqAlign, string& err) {
01577
01578 int pssmId = 0;
01579
01580 err.erase();
01581 if (seqAlign.Empty()) {
01582 err = "GetPssmIdFromSeqAlign: Empty Seq_align.\n";
01583 } else if (seqAlign->IsSetDim() && seqAlign->GetDim() != 2) {
01584 err = "GetPssmIdFromSeqAlign: Only Seq_aligns with dim = 2 supported.\n";
01585 } else if (seqAlign->GetSegs().IsDenseg()) {
01586 const CRef< CSeq_id >& pssmSeqId = seqAlign->GetSegs().GetDenseg().GetIds().back();
01587 pssmId = GetCDDPssmIdFromSeqId(pssmSeqId);
01588 } else if (seqAlign->GetSegs().IsDendiag()) {
01589 err = "GetPssmIdFromSeqAlign: Dense_diags not currently supported.\n";
01590 } else {
01591 err.append("GetPssmIdFromSeqAlign: Seq_align is an unsupported type (%d).\n", seqAlign->GetType());
01592 }
01593 return pssmId;
01594 }
01595
01596
01597
01598 int GetMasterGIFromSeqAlign(const CRef< CSeq_align >& seqAlign, string& err) {
01599
01600 int gi = 0;
01601
01602 err.erase();
01603 if (seqAlign.Empty()) {
01604 err = "GetMasterGIFromSeqAlign: Empty Seq_align.\n";
01605 } else if (seqAlign->GetSegs().IsDenseg()) {
01606 const CRef< CSeq_id >& seqId = seqAlign->GetSegs().GetDenseg().GetIds().front();
01607 if (seqId.NotEmpty() && seqId->IsGi()) {
01608 gi = seqId->GetGi();
01609 } else {
01610 err = "GetMasterGIFromSeqAlign: Dense_seg's master sequence is empty or not of type 'GI'.\n";
01611 }
01612 } else if (seqAlign->GetSegs().IsDendiag()) {
01613 const CRef< CSeq_id >& seqId = seqAlign->GetSegs().GetDendiag().front()->GetIds().front();
01614 if (seqId.NotEmpty() && seqId->IsGi()) {
01615 gi = seqId->GetGi();
01616 } else {
01617 err = "GetMasterGIFromSeqAlign: Dense_diag's master sequence is empty or not of type 'GI'.\n";
01618 }
01619 } else {
01620 err.append("GetMasterGIFromSeqAlign: Seq_align is an unsupported type (%d).\n", seqAlign->GetType());
01621 }
01622 return gi;
01623 }
01624
01625
01626 END_SCOPE(cd_utils)
01627 END_NCBI_SCOPE
01628
01629