NCBI C++ ToolKit
cuCdCore.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: cuCdCore.cpp 89947 2020-04-30 13:05:44Z grichenk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Adapted from CDTree-1 code by Chris Lanczycki
27  *
28  * File Description:
29  *
30  * Subclass of CCdd object.
31  * Originally forked from the CDTree3 CCd class.
32  *
33  * ===========================================================================
34  */
35 
36 
37 #include <ncbi_pch.hpp>
38 
39 #include <algorithm>
41 #include <objects/pub/Pub.hpp>
48 
49 #include <stdio.h>
50 
53 BEGIN_SCOPE(cd_utils)
54 
55 
56 //const int CCdCore::INVALID_MAPPED_POSITION = INVALID_POSITION;
57 
58 //const ncbi::ENull CCdCore::NULL_CREF = ncbi::null;
61 
62 
63  // destructor
64 CCdCore::~CCdCore(void) {
65 }
66 
67 // constructor
69 {
70 }
71 
72 /* ======================= */
73 /* CD identifier methods */
74 /* ======================= */
75 
76 string CCdCore::GetAccession() const {
77  int Dummy;
78  return(GetAccession(Dummy));
79 }
80 
81 string CCdCore::GetAccession(int& Version) const {
82 //-------------------------------------------------------------------------
83 // get accession name of CD
84 //-------------------------------------------------------------------------
85  CCdd_id_set::Tdata::const_iterator i;
86  string Str;
87 
88  for (i=GetId().Get().begin(); i!=GetId().Get().end(); i++) {
89  if ((*i)->IsGid()) {
90  if ((*i)->GetGid().IsSetVersion()) {
91  Version = (*i)->GetGid().GetVersion();
92  }
93  else {
94  Version = 1;
95  }
96  return((*i)->GetGid().GetAccession());
97  }
98  }
99  return(Str);
100 }
101 
102 void CCdCore::SetAccession(string Accession) {
103  SetAccession(Accession, 1);
104 }
105 
106 void CCdCore::SetAccession(string Accession, int Version) {
107 //-------------------------------------------------------------------------
108 // set accession name of CD
109 //-------------------------------------------------------------------------
110  bool hasGid = false;
111  CCdd_id_set::Tdata::iterator i;
112 
113  for (i=SetId().Set().begin(); i!=SetId().Set().end(); i++) {
114  if ((*i)->IsGid()) {
115  (*i)->SetGid().SetAccession(Accession);
116  (*i)->SetGid().SetVersion(Version);
117  hasGid = true;
118  }
119  }
120 
121  // If there was no Gid (or SetId().Set() is empty), create and add one.
122  if (!hasGid) {
123  CRef< CCdd_id > cdId(new CCdd_id());
124  CRef< CGlobal_id > global(new CGlobal_id());
125  global->SetAccession(Accession);
126  global->SetVersion(Version);
127  cdId->SetGid(*global);
128  SetId().Set().push_back(cdId);
129  }
130 }
131 
133 //-------------------------------------------------------------------------
134 // erase CD's uid
135 //-------------------------------------------------------------------------
136  CCdd_id_set::Tdata::iterator i;
137 
138  for (i=SetId().Set().begin(); i!=SetId().Set().end(); i++) {
139  if ((*i)->IsUid()) {
140  SetId().Set().erase(i);
141  return;
142  }
143  }
144 }
145 
146 int CCdCore::GetUID() const
147 {
148  int uid = 0;
149  const CCdd_id_set::Tdata ids = GetId().Get();
150  CCdd_id_set::Tdata::const_iterator idCit = ids.begin(), idEnd = ids.end();
151  for (; idCit != idEnd; ++idCit) {
152  if ((*idCit)->IsUid()) {
153  uid = (*idCit)->GetUid();
154  break;
155  }
156  }
157  return uid;
158 }
159 
160 
161 bool CCdCore::HasCddId(const CCdd_id& id) const{
162 
163  bool isHere = false;
164  CCdd_id_set::Tdata idSet = GetId().Get();
165  for (CCdd_id_set::Tdata::const_iterator ci=idSet.begin(); ci!=idSet.end(); ++ci) {
166  if ((**ci).Equals(id)) {
167  isHere = true;
168  break;
169  }
170  }
171  return isHere;
172 }
173 
174 
175 /* ============================ */
176 /* Basic information about CD */
177 /* ============================ */
178 
180 //-------------------------------------------------------------------------
181 // get descriptive comment about CD
182 //-------------------------------------------------------------------------
183  CCdd_descr_set::Tdata::const_iterator i;
184  string Str;
185 
186  if (IsSetDescription()) {
187  for (i=GetDescription().Get().begin(); i!=GetDescription().Get().end(); i++) {
188  if ((*i)->IsComment()) {
189  return((*i)->GetComment());
190  }
191  }
192  }
193  return(Str);
194 }
195 
197 //-------------------------------------------------------------------------
198 // get string indicating date of last change to CD
199 //-------------------------------------------------------------------------
200  CCdd_descr_set::Tdata::const_iterator i;
201  string Str;
202 
203  if (IsSetDescription()) {
204  for (i=GetDescription().Get().begin(); i!=GetDescription().Get().end(); i++) {
205  if ((*i)->IsUpdate_date()) {
206  (*i)->GetUpdate_date().GetDate(&Str);
207  return(Str);
208  }
209  }
210  }
211  return(Str);
212 }
213 
214 
215 int CCdCore::GetNumRows() const {
216 //-------------------------------------------------------------------------
217 // get number of rows in CD
218 //-------------------------------------------------------------------------
219 
220  const CRef< CSeq_annot >& alignment = GetAlignment();
221 
222  // Be sure GetAlign() is not an empty container -> no such thing as a CD blob with one row.
223  if (alignment.NotEmpty() && alignment->GetData().IsAlign() && alignment->GetData().GetAlign().size() > 0) {
224  // number pairs + 1 == num rows
225  return(alignment->GetData().GetAlign().size()+1);
226  }
227  return(0);
228 }
229 
230 // number of rows of alignment with valid sequence indices
232 
233  int count = 0, seqIndex = -1;
234  int nrows = GetNumRows();
235  for (int i = 0; i < nrows; ++i) {
236  seqIndex = GetSeqIndexForRowIndex(i);
237  if (seqIndex >= 0) {
238  ++count;
239  }
240  }
241  return count;
242 }
243 
245 //-------------------------------------------------------------------------
246 // get number of sequences in CD
247 //-------------------------------------------------------------------------
248  if (IsSetSequences()) {
249  if (GetSequences().IsSet()) {
250  return(GetSequences().GetSet().GetSeq_set().size());
251  }
252  }
253  return(0);
254 }
255 
256 
258 //-------------------------------------------------------------------------
259 // get total number of aligned residues
260 // TDendiag = list< CRef< CDense_diag > >
261 //-------------------------------------------------------------------------
262  const CRef< CSeq_align >& seqAlign = GetSeqAlign(0);
263  if (seqAlign.NotEmpty()) {
264  return GetNumAlignedResidues(seqAlign);
265  }
266  return 0;
267 
268 }
269 
270 
272 //-------------------------------------------------------------------------
273 // get number of residues in the master sequence, from the first
274 // aligned residue to the last aligned residue
275 //-------------------------------------------------------------------------
276  return(GetUpperBound(0) - GetLowerBound(0) + 1);
277 }
278 
279 
280 // return number of blocks in alignment (0 if no alignment, or not a Dense_diag)
282  if (IsSeqAligns()) {
283  const CRef< CSeq_align >& seqAlign = GetSeqAlign(0);
284  if (seqAlign.NotEmpty()) {
285  return GetBlockCount(seqAlign);
286  }
287  }
288  return 0;
289 }
290 
291 bool CCdCore::GetCDBlockLengths(vector<int>& lengths) const {
292  if (IsSeqAligns()) {
293  const CRef< CSeq_align >& seqAlign = GetSeqAlign(0);
294  if (seqAlign.NotEmpty()) {
295  return (GetBlockLengths(seqAlign, lengths) != 0);
296  }
297  }
298  return false;
299 }
300 
301 bool CCdCore::GetBlockStartsForRow(int rowIndex, vector<int>& starts) const {
302  bool onMaster = (rowIndex) ? false : true;
303  bool result = false;
304  if (IsSeqAligns() && rowIndex >= 0) {
305  const CRef< CSeq_align >& seqAlign = GetSeqAlign(rowIndex);
306  if (seqAlign.NotEmpty()) {
307  result = (GetBlockStarts(seqAlign, starts, onMaster) != 0);
308  sort(starts.begin(), starts.end());
309  }
310  }
311  return result;
312 }
313 
314 
315 /* ============================================ */
316 /* Find/convert sequence list and row indices */
317 /* ============================================ */
318 
319 int CCdCore::GetSeqIndexForRowIndex(int rowIndex) const {
320  int seqIndex = -1;
321  CRef< CSeq_id > seqId;
322 
323  if (rowIndex < 0 || rowIndex > GetNumRows()) {
324  return seqIndex;
325  }
326 
327  if (GetSeqIDFromAlignment(rowIndex, seqId)) {
328  seqIndex = GetSeqIndex(seqId);
329  }
330  return seqIndex;
331 }
332 
334  return GetSeqIndexForRowIndex(0);
335 }
336 
337 int CCdCore::GetSeqIndex(const CRef< CSeq_id >& SeqID) const{
338 //-------------------------------------------------------------------------
339 // get the sequence index with given SeqID
340 //-------------------------------------------------------------------------
341 
342  // Sanity check that the ASN.1 has the assumed format.
343  if (!IsSetSequences() || !GetSequences().IsSet() || GetSequences().GetSet().GetSeq_set().size() == 0) {
344  return(-1);
345  }
346 
347  int i, NumSequences = GetNumSequences();
348  CRef< CSeq_id > TrialSeqID;
349 
350  CBioseq_set::TSeq_set::const_iterator seCit = GetSequences().GetSet().GetSeq_set().begin();
351  CBioseq_set::TSeq_set::const_iterator seCend = GetSequences().GetSet().GetSeq_set().end();
352 
353  if (SeqID.NotEmpty()) {
354  for (i=0; seCit != seCend && i<NumSequences; ++seCit, i++) {
355  // Stopped using GetSeqIDForIndex (or variants) as it selected only one of the possible Seq_ids
356  // -- potentially not the desired one for a given use case.
357  if ((*seCit)->IsSeq() && SeqIdHasMatchInBioseq(SeqID, (*seCit)->GetSeq())) {
358  return i;
359  }
360  }
361  }
362  return(-1);
363 }
364 
366 //-------------------------------------------------------------------------
367 // get the row index for the Nth match of ID.
368 // N is 1-based.
369 // return -1 if no match is found.
370 //-------------------------------------------------------------------------
371  int k, Count, NumRows=GetNumRows();
372  CRef< CSeq_id > TestID;
373 
374  Count = 0;
375  for (k=0; k<NumRows; k++) {
376  GetSeqIDFromAlignment(k, TestID);
377  if (SeqIdsMatch(ID, TestID)) {
378  Count++;
379  if (Count == N) {
380  return(k);
381  }
382  }
383  }
384  return(-1);
385 }
386 
387 // convenience method to return a vector vs. a list
388 // find all row indices for a seqID, irrespective of the footprint (return # found)
389 int CCdCore::GetAllRowIndicesForSeqId(const CRef<CSeq_id>& SeqID, vector<int>& rows) const
390 {
391  int numMatches = 0;
392  list<int> lint;
393  list<int>::iterator lintIt;
394 
395  rows.clear();
396  numMatches = GetAllRowIndicesForSeqId(SeqID, lint);
397  if (numMatches > 0) {
398  for (lintIt = lint.begin(); lintIt != lint.end(); ++lintIt) {
399  rows.push_back(*lintIt);
400  }
401  }
402  return numMatches;
403 }
404 
405 // find all row indices for a seqID, irrespective of the footprint (return # found)
406 int CCdCore::GetAllRowIndicesForSeqId(const CRef<CSeq_id>& SeqID, list<int>& rows) const
407 {
408  // List size is returned.
409  // Place all row indices for the seq_id into the list 'rows'. Use a new or
410  // cleared list; do not pass a list with other data as this function clears it!!
411  // If Seq_id is not found, return empty list.
412 
413  CRef<CSeq_id> testID;
414  CRef<CSeq_id> findID = SeqID;
415  int i, nrow = GetNumRows();
416 
417  rows.clear();
418  for (i=0; i<nrow; i++) {
419  if (GetSeqIDFromAlignment(i, testID)) { // tests only for gi or pdb IDs
420  if (findID->Match(*testID)) { // match
421  rows.push_back(i);
422  }
423  }
424  }
425  return rows.size();
426 }
427 
428 
429 /* ========================================== */
430 /* Access CD info via alignment row number */
431 /* ========================================== */
432 
433 bool CCdCore::GetGI(int Row, TGi& GI, bool ignorePDBs) {
434 //-------------------------------------------------------------------------
435 // get the GI for Row
436 //-------------------------------------------------------------------------
437  CRef< CSeq_id > SeqID;
438  int Pair, DenDiagRow;
439 
440  Pair = (Row <= 1) ? 0 : Row-1;
441  DenDiagRow = (Row == 0) ? 0 : 1;
442  GetSeqIDForRow(Pair, DenDiagRow, SeqID);
443  if (SeqID->IsGi()) {
444  GI = SeqID->GetGi();
445  return(true);
446  } else if (SeqID->IsPdb() && !ignorePDBs) { // to match AlignmentCollection behavior
447  GI = GetGIFromSequenceList(GetSeqIndex(SeqID));
448  return true;
449  }
450  return(false);
451 }
452 
453 
454 bool CCdCore::GetPDB(int Row, const CPDB_seq_id*& pPDB) {
455 //-------------------------------------------------------------------------
456 // get the PDB ID for Row
457 //-------------------------------------------------------------------------
458  CRef< CSeq_id > SeqID;
459  int Pair, DenDiagRow;
460 
461  Pair = (Row <= 1) ? 0 : Row-1;
462  DenDiagRow = (Row == 0) ? 0 : 1;
463  GetSeqIDForRow(Pair, DenDiagRow, SeqID);
464  if (SeqID->IsPdb()) {
465  pPDB = &(SeqID->GetPdb());
466  return(true);
467  }
468  return(false);
469 }
470 
471 int CCdCore::GetLowerBound(int Row) const {
472 //-------------------------------------------------------------------------
473 // get the lower alignment boundary for Row
474 // return INVALID_MAPPED_POSITION on failure
475 //-------------------------------------------------------------------------
476  CRef< CDense_diag > DenDiag;
477  CDense_diag::TStarts::const_iterator i;
478 
479  const CRef< CSeq_align >& seqAlign = GetSeqAlign(Row);
480  if (seqAlign.NotEmpty() && GetFirstOrLastDenDiag(seqAlign, true, DenDiag)) {
481  i = DenDiag->GetStarts().begin();
482  if (Row != 0) {
483  i++;
484  }
485  return(*i);
486  }
487  return INVALID_POSITION;
488 }
489 
490 int CCdCore::GetUpperBound(int Row) const {
491 //-------------------------------------------------------------------------
492 // get the upper alignment boundary for Row
493 // return INVALID_MAPPED_POSITION on failure
494 //-------------------------------------------------------------------------
495  CRef< CDense_diag > DenDiag;
496  CDense_diag::TStarts::const_iterator i;
497 
498  const CRef< CSeq_align >& seqAlign = GetSeqAlign(Row);
499  if (seqAlign.NotEmpty() && GetFirstOrLastDenDiag(seqAlign, false, DenDiag)) {
500  i = DenDiag->GetStarts().begin();
501  if (Row != 0) {
502  i++;
503  }
504  return((*i + DenDiag->GetLen()) - 1);
505  }
506  return INVALID_POSITION;
507 }
508 
509 
510 bool CCdCore::Get_GI_or_PDB_String_FromAlignment(int RowIndex, std::string& Str, bool Pad, int Len) const {
511 //-------------------------------------------------------------------
512 // get seq-id string for RowIndex of alignment
513 //-------------------------------------------------------------------
514  int Pair = (RowIndex <= 1) ? 0 : RowIndex-1;
515  int DenDiagRow = (RowIndex == 0) ? 0 : 1;
516  CRef< CSeq_id > SeqID;
517 
518  GetSeqIDForRow(Pair, DenDiagRow, SeqID);
519  if (SeqID->IsGi() || SeqID->IsPdb()) {
520  Str += Make_SeqID_String(SeqID, Pad, Len);
521  } else {
522  Str += "<Non-gi/pdb Sequence Types Unsupported>";
523  }
524 
525  return(true);
526 }
527 
528 
529 bool CCdCore::GetSeqEntryForRow(int rowId, CRef< CSeq_entry >& seqEntry) const {
530 
531  bool result = false;
532  CRef< CSeq_id > seqID;
533  list< CRef< CSeq_id > >::const_iterator sici;
534  list< CRef< CSeq_entry > >::const_iterator seci, seci_start, seci_end;
535 
536  if (GetSeqIDFromAlignment(rowId, seqID)) {
537  if (IsSetSequences()) {
538  if (GetSequences().IsSet()) {
539  seci_start = GetSequences().GetSet().GetSeq_set().begin();
540  seci_end = GetSequences().GetSet().GetSeq_set().end();
541  for (seci = seci_start; seci != seci_end && result == false; ++seci) {
542  if ((*seci)->IsSeq()) {
543  for (sici = (*seci)->GetSeq().GetId().begin();
544  sici != (*seci)->GetSeq().GetId().end() && result == false; ++sici) {
545  if (seqID->Match(**sici)) {
546  result = true;
547  seqEntry = *seci;
548  }
549  }
550  }
551  }
552  }
553  }
554  }
555 
556  return result;
557 }
558 
559 
560 // get the bioseq for the designated alignment row (for editing)
561 bool CCdCore::GetBioseqForRow(int rowId, CRef< CBioseq >& bioseq) {
562 
563  int seqIndex = GetSeqIndexForRowIndex(rowId);
564  return GetBioseqForIndex(seqIndex, bioseq);
565 }
566 
567 // find the species string for alignment row
568 string CCdCore::GetSpeciesForRow(int Row) {
569  CRef< CBioseq > bioseq; // = GetBioseqForRow(Row);
570  if (GetBioseqForRow(Row, bioseq) && !bioseq.IsNull()) {
571  return GetSpeciesFromBioseq(*bioseq);// rework GetSpecies to use other stuff...
572  }
573  return kEmptyStr;
574 }
575 
576 
577 // get the sequence for specified row
579  int seqIndex = GetSeqIndexForRowIndex(rowId);
580  return GetSequenceStringByIndex(seqIndex);
581 }
582 
583 
584 /* ========================================== */
585 /* Access CD info via a sequence list index */
586 /* ========================================== */
587 
588 TGi CCdCore::GetGIFromSequenceList(int SeqIndex) const {
589 //-------------------------------------------------------------------------
590 // get GI from the list of sequences.
591 // return -1 if no GI is found.
592 //-------------------------------------------------------------------------
593  list< CRef< CSeq_entry > >::const_iterator i;
594  list< CRef< CSeq_id > >::const_iterator j;
595  int SeqCount, IDCount;
596 
597  if (IsSetSequences()) {
598  if (GetSequences().IsSet()) {
599  // count to the SeqIndex sequence
600  SeqCount = 0;
601  for (i=GetSequences().GetSet().GetSeq_set().begin();
602  i!=GetSequences().GetSet().GetSeq_set().end(); i++) {
603  if (SeqCount == SeqIndex) {
604  if ((*i)->IsSeq()) {
605  // look through IDs for a gi
606  IDCount = 0;
607  for (j = (*i)->GetSeq().GetId().begin();
608  j != (*i)->GetSeq().GetId().end(); j++) {
609  if ((*j)->IsGi()) {
610  return((*j)->GetGi());
611  }
612  IDCount++;
613  }
614  }
615  }
616  SeqCount++;
617  if (SeqCount > SeqIndex) break;
618  }
619  }
620  }
621  return INVALID_GI;
622 }
623 
624 
625 string CCdCore::GetDefline(int SeqIndex) const {
626 //-------------------------------------------------------------------------
627 // get a description for the SeqIndex sequence
628 //-------------------------------------------------------------------------
629  list< CRef< CSeq_entry > >::const_iterator i;
630  list< CRef< CSeqdesc > >::const_iterator j;
631  int SeqCount;
632  string Description = kEmptyStr;
633 
634  if (IsSetSequences()) {
635  if (GetSequences().IsSet()) {
636  // count to the SeqIndex sequence
637  SeqCount = 0;
638  for (i=GetSequences().GetSet().GetSeq_set().begin();
639  i!=GetSequences().GetSet().GetSeq_set().end(); i++) {
640  if (SeqCount == SeqIndex) {
641  if ((*i)->IsSeq()) {
642  if ((*i)->GetSeq().IsSetDescr()) {
643  // look through the sequence descriptions
644  for (j=(*i)->GetSeq().GetDescr().Get().begin();
645  j!=(*i)->GetSeq().GetDescr().Get().end(); j++) {
646  // if there's a title, return that description
647  if ((*j)->IsTitle()) {
648  return((*j)->GetTitle());
649  }
650  // if there's a pdb description, return it
651  if ((*j)->IsPdb()) {
652  if ((*j)->GetPdb().GetCompound().size() > 0) {
653  return((*j)->GetPdb().GetCompound().front());
654  }
655  }
656  }
657  }
658  }
659  }
660  SeqCount++;
661  if (SeqCount > SeqIndex) break;
662  }
663  }
664  }
665  return(Description);
666 }
667 
668 
669 // find the species string for sequence list index
670 string CCdCore::GetSpeciesForIndex(int SeqIndex) {
671  CRef< CBioseq > bioseq; // = GetBioseqForIndex(SeqIndex);
672  if (GetBioseqForIndex(SeqIndex, bioseq) && !bioseq.IsNull()) {
673  return GetSpeciesFromBioseq(*bioseq);
674  }
675  return kEmptyStr;
676 }
677 
678 
679 bool CCdCore::GetSeqEntryForIndex(int seqIndex, CRef< CSeq_entry > & seqEntry) const
680 {
681  list< CRef< CSeq_entry > >::const_iterator i, iend;
682 
683  int SeqCount = 0;
684  if (seqIndex >= 0 && seqIndex < GetNumSequences() && IsSetSequences()) {
685  if (GetSequences().IsSet()) {
686  iend = GetSequences().GetSet().GetSeq_set().end();
687  for (i = GetSequences().GetSet().GetSeq_set().begin(); i != iend; ++i) {
688  if (SeqCount == seqIndex) {
689  seqEntry = (*i);
690  return true;
691  }
692  SeqCount++;
693  }
694  }
695  }
696  seqEntry.Reset();
697  return false;
698 
699 }
700 
701 // get the bioseq for the designated sequence index
702 bool CCdCore::GetBioseqForIndex(int seqIndex, CRef< CBioseq >& bioseq) {
703  list< CRef< CSeq_entry > >::iterator i, iend;
704 
705  int SeqCount = 0;
706  if (seqIndex >= 0 && seqIndex < GetNumSequences() && IsSetSequences()) {
707  if (SetSequences().IsSet()) {
708  iend = SetSequences().SetSet().SetSeq_set().end();
709  for (i = SetSequences().SetSet().SetSeq_set().begin(); i != iend; ++i) {
710  if (SeqCount == seqIndex && (*i)->IsSeq()) {
711  bioseq.Reset(&(*i)->SetSeq());
712  return true;
713  }
714  SeqCount++;
715  }
716  }
717  }
718  bioseq.Reset();
719  return false;
720 }
721 
722 
723 // get the sequence for specified sequence index
724 string CCdCore::GetSequenceStringByIndex(int seqIndex) {
725  string s = kEmptyStr;
726  CRef< CBioseq > bioseq;
727  if (GetBioseqForIndex(seqIndex, bioseq) && GetNcbieaaString(*bioseq, s)) {
728  return s;
729  }
730  return s;
731 }
732 
733 
734 /* =========================================== */
735 /* Examine alignment for a SeqId or footprint */
736 /* =========================================== */
737 
738 bool CCdCore::HasSeqId(const CRef< CSeq_id >& ID) const {
739 //-------------------------------------------------------------------------
740 // look through each row of the alignment & pending list for a matching ID
741 //-------------------------------------------------------------------------
742  int Dummy;
743  return(HasSeqId(ID, Dummy));
744 }
745 
746 
747 bool CCdCore::HasSeqId(const CRef< CSeq_id >& ID, int& RowIndex) const {
748 //-------------------------------------------------------------------------------
749 // look through each row of the alignment for a matching ID
750 // also look through each row of the pending list for a matching ID -- dih & vvs
751 //-------------------------------------------------------------------------------
752  int k, Pair, DenDiagRow, NumRows=GetNumRows();
753  CRef< CSeq_id > TestID;
754 
755  for (k=0; k<NumRows; k++) {
756  Pair = (k <= 1) ? 0 : k-1;
757  DenDiagRow = (k == 0) ? 0 : 1;
758  GetSeqIDForRow(Pair, DenDiagRow, TestID);
759  if (SeqIdsMatch(ID, TestID)) {
760  RowIndex = k;
761  return(true);
762  }
763  }
764 
765  k = 0;
766  list <CRef <CUpdate_align> > ::const_iterator pPen;
767  for(pPen=GetPending().begin();pPen!=GetPending().end();pPen++){
768  const CSeq_align * pAl = *((*pPen)->GetSeqannot().GetData().GetAlign().begin());
769  const CDense_diag * pDDPen=*(pAl->GetSegs().GetDendiag().begin());
770  vector < CRef< CSeq_id > >::const_iterator pid=pDDPen->GetIds().begin();
771  TestID=*(++pid);
772  if (SeqIdsMatch(ID, TestID)) {
773  RowIndex = k;
774  return(true);
775  }
776  k++;
777  }
778  return(false);
779 }
780 
781 
782 /* ====================================== */
783 /* SeqID getters ... from alignment info */
784 /* ====================================== */
785 
786 
787 bool CCdCore::GetSeqIDForRow(int Pair, int DenDiagRow, CRef< CSeq_id >& SeqID) const {
788 //-------------------------------------------------------------------------
789 // get a SeqID.
790 // first get the Pair'th DenDiag, then the DenDiagRow'th SeqID.
791 //-------------------------------------------------------------------------
792  CRef< CDense_diag > DenDiag;
793  CDense_diag::TIds IdsSet;
794  CDense_diag::TIds::iterator i;
795  int Row;
796 
797  Row = (Pair == 0) ? DenDiagRow : Pair+1;
798 
799  const CRef< CSeq_align >& seqAlign = GetSeqAlign(Row);
800  if (seqAlign.NotEmpty() && GetFirstOrLastDenDiag(seqAlign, true, DenDiag)) {
801  IdsSet = DenDiag->GetIds();
802  // for Row=0, get the first id, otherwise get the 2nd id
803  i = IdsSet.begin();
804  if (DenDiagRow != 0) {
805  i++;
806  }
807  SeqID = (*i);
808  return(SeqID.NotEmpty());
809  }
810  return(false);
811 }
812 
813 
814 // Returns false if an empty SeqID is found
815 bool CCdCore::GetSeqIDFromAlignment(int RowIndex, CRef<CSeq_id>& SeqID) const { // get SeqID from alignment
816  if (RowIndex < 0) {
817  return false;
818  }
819  int Pair = (RowIndex <= 1) ? 0 : RowIndex-1;
820  int DenDiagRow = (RowIndex == 0) ? 0 : 1;
821  return(GetSeqIDForRow(Pair, DenDiagRow, SeqID));
822 }
823 
824 
825 /* ====================================== */
826 /* SeqID getters ... from sequence list */
827 /* ====================================== */
828 
829 bool CCdCore::GetSeqIDForIndex(int SeqIndex, CRef< CSeq_id >& SeqID) const {
830 //-------------------------------------------------------------------------
831 // get a SeqID from a list of sequences.
832 // each sequence can have multiple id's.
833 // if there's a pdb-id, return it.
834 // otherwise, if there's a gi, return it.
835 // otherwise, return false.
836 // return false if the SeqID is empty
837 //-------------------------------------------------------------------------
838  list< CRef< CSeq_entry > >::const_iterator i;
839  list< CRef< CSeq_id > >::const_iterator j;
840  int SeqCount, IDCount, NumIDs;
841 
842  if (IsSetSequences()) {
843  if (GetSequences().IsSet()) {
844  // count to the SeqIndex sequence
845  SeqCount = 0;
846  for (i=GetSequences().GetSet().GetSeq_set().begin();
847  i!=GetSequences().GetSet().GetSeq_set().end(); i++) {
848  if (SeqCount == SeqIndex) {
849  if ((*i)->IsSeq()) {
850  // look through the IDs for a PDB id
851  NumIDs = (*i)->GetSeq().GetId().size();
852  IDCount = 0;
853  for (j = (*i)->GetSeq().GetId().begin();
854  j != (*i)->GetSeq().GetId().end(); j++) {
855  if ((*j)->IsPdb()) {
856  SeqID = (*j);
857  return(SeqID.NotEmpty());
858 // return(true);
859  }
860  IDCount++;
861  }
862  // look through IDs again for a gi
863  IDCount = 0;
864  for (j = (*i)->GetSeq().GetId().begin();
865  j != (*i)->GetSeq().GetId().end(); j++) {
866  if ((*j)->IsGi()) {
867  SeqID = (*j);
868  return(SeqID.NotEmpty());
869 // return(true);
870  }
871  IDCount++;
872  }
873  // look through IDs again for a Other
874  IDCount = 0;
875  for (j = (*i)->GetSeq().GetId().begin();
876  j != (*i)->GetSeq().GetId().end(); j++) {
877  if ((*j)->IsOther()) {
878  SeqID = (*j);
879  return(SeqID.NotEmpty());
880 // return(true);
881  }
882  IDCount++;
883  }
884  if (NumIDs > 0)
885  {
886  SeqID = *((*i)->GetSeq().GetId().begin());
887  return (SeqID.NotEmpty());
888  }
889  }
890  return(false);
891  }
892  SeqCount++;
893  }
894  }
895  }
896  return(false);
897 }
898 
899 
900 bool CCdCore::GetSeqIDs(int SeqIndex, list< CRef< CSeq_id > >& SeqIDs) {
901 //-------------------------------------------------------------------------
902 // get the list of SeqIDs for a sequence
903 //-------------------------------------------------------------------------
904  list< CRef< CSeq_entry > >::const_iterator i;
905  int SeqCount;
906 
907  if (IsSetSequences()) {
908  if (GetSequences().IsSet()) {
909  // count to the SeqIndex sequence
910  SeqCount = 0;
911  for (i=GetSequences().GetSet().GetSeq_set().begin();
912  i!=GetSequences().GetSet().GetSeq_set().end(); i++) {
913  if (SeqCount == SeqIndex) {
914  if ((*i)->IsSeq()) {
915  // return its set of ids
916  SeqIDs = (*i)->GetSeq().GetId();
917  return(true);
918  }
919  }
920  SeqCount++;
921  if (SeqCount > SeqIndex) break;
922  }
923  }
924  }
925  return(false);
926 }
927 
928 // Assumes proper index is in range and CD in proper format for retrieval; no checks.
929 const list< CRef< CSeq_id > >& CCdCore::GetSeqIDs(int SeqIndex) const{
930  list< CRef< CSeq_entry > >::const_iterator i=GetSequences().GetSet().GetSeq_set().begin();
931  int SeqCount = 0;
932 
933  while (i != GetSequences().GetSet().GetSeq_set().end()) {
934  if (SeqCount == SeqIndex) {
935  break;
936  }
937  ++SeqCount;
938  ++i;
939  }
940  // return its set of ids
941  return (*i)->GetSeq().GetId();
942 }
943 
944 /* ============= */
945 /* Row removal */
946 /* ============= */
947 
948 
949 int intSortRowsFunction(void * pVal,int i, int j)
950 {
951  vector<int> * iVal=(vector<int> *)pVal;
952  if ((*iVal)[i]>(*iVal)[j])return 1;
953  else if ((*iVal)[i]<(*iVal)[j])return -1;
954  else return 0;
955 }
956 
957 
958 bool CCdCore::EraseTheseRows(const std::vector<int>& TossRows) {
959 //-------------------------------------------------------------------------
960 // erase rows from the alignment.
961 // EraseRows is a list of rows that are deleted from this CD.
962 // EraseRows won't contain the master (0 index) row.
963 //-------------------------------------------------------------------------
964  int i;
965 
966  int Count = 0;
967 
968  CRef< CSeq_annot > alignment;
969  if (!GetAlignment(alignment)) {
970  return false;
971  }
972 
973  int * ind=new int[3*TossRows.size()];
974  algSortQuickCallbackIndex((void * )&TossRows,TossRows.size(),ind+TossRows.size(),ind,intSortRowsFunction);
975 
976  for (i=TossRows.size()-1; i>=0; i--) {
977  if (TossRows[ind[i]] == 0) {
978  delete [] ind;
979  return(false); // return false if master row is to be deleted
980  }
981  if (!EraseRow(alignment, TossRows[ind[i]])) {
982  delete [] ind;
983  return(false); // return false if problem deleting a row
984  }
985  Count++;
986  }
987 
988  delete [] ind;
989 
990  return(true);
991 }
992 
993 
994 bool CCdCore::EraseOtherRows(const std::vector<int>& KeepRows) {
995 //-------------------------------------------------------------------------
996 // erase rows from the alignment.
997 // KeepRows is a list of rows that are NOT deleted from this CD.
998 // KeepRows won't contain the master (0 index) row.
999 // return of true means successful completion.
1000 //-------------------------------------------------------------------------
1001 
1002  int j, k, NumRows;
1003  bool FoundIt;
1004 
1005  CRef< CSeq_annot > alignment;
1006  if (!GetAlignment(alignment)) {
1007  return false;
1008  }
1009 
1010  NumRows = alignment->SetData().SetAlign().size() + 1;
1011  for (j=NumRows-1; j>0; j--) {
1012  // see if row is in KeepRows
1013  FoundIt = false;
1014  for (k=0; k<(int)KeepRows.size(); k++) {
1015  if (KeepRows[k] == j) {
1016  FoundIt = true;
1017  break;
1018  }
1019  }
1020  // if row is not in KeepRows, then erase it
1021  if (!FoundIt) {
1022  if (!EraseRow(alignment, j)) {
1023  return(false);
1024  }
1025  }
1026  }
1027 
1028  return(true);
1029 }
1030 
1031 
1033 //-------------------------------------------------------------------------
1034 // erase sequences not in alignment
1035 //-------------------------------------------------------------------------
1036  bool hasId;
1037  int i;
1038  int NumSequences = GetNumSequences();
1039  set<int> indicesToErase;
1040  set<int>::reverse_iterator rit, ritEnd;
1041 
1042  CBioseq::TId::const_iterator idCit, idCend;
1043  CBioseq_set::TSeq_set::const_iterator seCit = GetSequences().GetSet().GetSeq_set().begin();
1044  CBioseq_set::TSeq_set::const_iterator seCend = GetSequences().GetSet().GetSeq_set().end();
1045 
1046  // Note: GetSeqIDForIndex only checks one of the possible IDs against those in the alignment;
1047  // look through all possible IDs them before erasing a sequence.
1048 
1049  for (i=0; seCit != seCend && i<NumSequences; ++seCit, i++) {
1050  hasId = false;
1051  if ((*seCit)->IsSeq()) {
1052  const CBioseq::TId& ids = (*seCit)->GetSeq().GetId();
1053  idCend = ids.end();
1054  for (idCit = ids.begin(); idCit != idCend; ++idCit) {
1055  if (HasSeqId(*idCit)) {
1056  hasId = true;
1057  break;
1058  }
1059  }
1060  if (!hasId) {
1061  indicesToErase.insert(i);
1062  }
1063  }
1064  }
1065 
1066  // Erase in reverse order so iterators don't get invalidated.
1067  if (indicesToErase.size() > 0) {
1068  ritEnd = indicesToErase.rend();
1069  for (rit = indicesToErase.rbegin(); rit != ritEnd; ++rit) {
1070  EraseSequence(*rit);
1071  }
1072  }
1073 
1074 }
1075 
1076 
1077 void CCdCore::EraseSequence(int SeqIndex) {
1078 //-------------------------------------------------------------------------
1079 // erase a sequence from the set of sequences
1080 //-------------------------------------------------------------------------
1081  list< CRef< CSeq_entry > >::iterator i;
1082  int SeqCount;
1083 
1084  if (IsSetSequences()) {
1085  if (GetSequences().IsSet()) {
1086  SeqCount = 0;
1087  for (i=SetSequences().SetSet().SetSeq_set().begin();
1088  i!=SetSequences().SetSet().SetSeq_set().end(); i++) {
1089  if (SeqCount == SeqIndex) {
1090  SetSequences().SetSet().SetSeq_set().erase(i);
1091  return;
1092  }
1093  SeqCount++;
1094  if (SeqCount > SeqIndex) break;
1095  }
1096  }
1097  }
1098 }
1099 
1100 /* ================================================================ */
1101 /* Methods for adding alignment or sequence to CD */
1102 /* ================================================================ */
1103 
1105 {
1106  CRef< CSeq_align > sa(new CSeq_align());
1107  sa->Assign(*seqAlign);
1108  (*(SetSeqannot().begin()))->SetData().SetAlign().push_back(sa);
1109  return true;
1110 }
1111 
1113 {
1114  CRef< CSeq_align > sa(new CSeq_align());
1115  sa->Assign(*seqAlign);
1116  CRef< CUpdate_align > newPend ( new CUpdate_align);
1118 
1119  newPend->SetSeqannot().SetData().SetAlign().push_back(sa); // copy the alignment to new pending alignmnet
1120  com->SetComment ("Sequence aligns to the CD partially.");
1121  newPend->SetDescription().push_back(com);
1122  newPend->SetType(CUpdate_align::eType_other);
1123  SetPending().push_back(newPend);
1124  return true;
1125 }
1126 
1128 {
1129  for (set<int>::reverse_iterator sit = rows.rbegin(); sit != rows.rend(); sit++)
1130  {
1131  ErasePendingRow(*sit);
1132  }
1133  EraseSequences();
1134 }
1135 
1137 {
1138  list< CRef< CUpdate_align > >& pendingList = SetPending();
1139  list< CRef< CUpdate_align > >::iterator lit = pendingList.begin();
1140  int order = 0;
1141  for(; lit != pendingList.end(); ++lit)
1142  {
1143  if (order== row)
1144  {
1145  pendingList.erase(lit);
1146  break;
1147  }
1148  else
1149  order++;
1150  }
1151 }
1152 
1154 {
1155  CRef< CSeq_entry > newSeq(new CSeq_entry());
1156  newSeq->Assign(*srcSeq);
1157  SetSequences().SetSet().SetSeq_set().push_back(newSeq);
1158  return true;
1159 }
1161 {
1162  (*(SetSeqannot().begin()))->SetData().SetAlign().clear();
1163  SetPending().clear();
1164  SetSequences().SetSet().SetSeq_set().clear();
1165 }
1166 /* ================================================================ */
1167 /* Methods for structures, structure alignments, MMDB identifiers */
1168 /* ================================================================ */
1169 
1170 bool CCdCore::SynchronizeMaster3D(bool checkRow1WhenConsensusMaster)
1171 {
1172  bool result = false;
1173  CRef< CSeq_id > masterPdbId(new CSeq_id);
1174 
1175  ResetMaster3d();
1176  if (Has3DMaster()) {
1177 
1178  // this should *always* be true but just in case...) {
1179  if (GetSeqIDForRow(0, 0, masterPdbId) && masterPdbId->IsPdb()) {
1180  SetMaster3d().push_back(masterPdbId);
1181  result = true;
1182  }
1183 
1184  } else if (checkRow1WhenConsensusMaster && UsesConsensusSequenceAsMaster()) {
1185 
1186  // If the first row is a structure, then this will be the master3d entry
1187  // after the consensus has been removed.
1188  if (GetSeqIDForRow(0, 1, masterPdbId) && masterPdbId->IsPdb()) {
1189  SetMaster3d().push_back(masterPdbId);
1190  result = true;
1191  }
1192  }
1193 
1194  return result;
1195 }
1196 
1197 bool CCdCore::Has3DMaster() const {
1198 //-------------------------------------------------------------------------
1199 // confirm if this CD has a structure as its master
1200 // this must be true for all Seq_aligns in the Cdd object
1201 //-------------------------------------------------------------------------
1202 
1203  bool result = true;
1204  bool tmp_result = false;
1205  CRef< CSeq_align > salist;
1206  TDendiag ddlist;
1207  list< CRef< CSeq_annot > >::const_iterator sanci;
1208 
1209  if (IsSetSeqannot()) {
1210  for (sanci = GetSeqannot().begin(); sanci != GetSeqannot().end(); ++sanci) {
1211  tmp_result = false;
1212  if ((*sanci)->GetData().IsAlign()) {
1213  salist = (*sanci)->GetData().GetAlign().front();
1214  if (salist->GetSegs().IsDendiag()) {
1215  ddlist = salist->GetSegs().GetDendiag();
1216  if (ddlist.front()->GetIds().front()->IsPdb()) {
1217  tmp_result = true;
1218  }
1219  }
1220  }
1221  result = result & tmp_result;
1222  }
1223  }
1224  return result;
1225 }
1226 
1228 //-------------------------------------------------------------------------
1229 // return the number of structure-related alignments in a CD,
1230 // ignoring the alignment to a consensus sequence if present.
1231 //-------------------------------------------------------------------------
1232 
1233  int count = 0;
1234 
1235  TDendiag ddlist;
1236  list< CRef< CSeq_annot > >::const_iterator sanci;
1237  list< CRef< CSeq_align > >::const_iterator saci;
1238 
1239  bool usesConsensus = UsesConsensusSequenceAsMaster();
1240  bool structMaster = Has3DMaster();
1241 
1242  if (!usesConsensus && !structMaster) {
1243  return count;
1244  }
1245  if (IsSetSeqannot()) {
1246  for (sanci = GetSeqannot().begin(); sanci != GetSeqannot().end(); ++sanci) {
1247  if ((*sanci)->GetData().IsAlign()) {
1248  for (saci = (*sanci)->GetData().GetAlign().begin(); \
1249  saci != (*sanci)->GetData().GetAlign().end(); ++saci) {
1250  if ((*saci)->GetSegs().IsDendiag()) {
1251  ddlist=(*saci)->GetSegs().GetDendiag();
1252  if (ddlist.front()->GetIds().back()->IsPdb()) {
1253  ++count;
1254  }
1255  }
1256  }
1257  }
1258  }
1259  }
1260  if (count > 0 && usesConsensus) {
1261  --count;
1262  }
1263 
1264  return count;
1265 }
1266 
1267 
1268 bool CCdCore::GetRowsForMmdbId(int mmdbId, list<int>& rows) const {
1269 
1270  int rowMmdbId= -1;
1271  int seqIndex = -1;
1272 
1273  rows.clear();
1274  if (mmdbId < 0) {
1275  return false;
1276  }
1277 
1278  for (int rowIndex=0; rowIndex<GetNumRows(); rowIndex++) {
1279  rowMmdbId = -1;
1280  seqIndex = GetSeqIndexForRowIndex(rowIndex);
1281  if (seqIndex > 0) {
1282  if (GetMmdbId(seqIndex, rowMmdbId) && (rowMmdbId == mmdbId)) {
1283  rows.push_back(rowIndex);
1284  }
1285  }
1286  }
1287  if (rows.size() > 0) {
1288  return true;
1289  }
1290  return false;
1291 }
1292 
1293 bool CCdCore::GetRowsWithMmdbId(vector<int>& rows) const {
1294 
1295  int rowMmdbId= -1;
1296  int seqIndex = -1;
1297 
1298  //rows.clear();
1299  for (int rowIndex=0; rowIndex<GetNumRows(); rowIndex++) {
1300  rowMmdbId = -1;
1301  seqIndex = GetSeqIndexForRowIndex(rowIndex);
1302  if (seqIndex >= 0) {
1303  if (GetMmdbId(seqIndex, rowMmdbId)) {
1304  rows.push_back(rowIndex);
1305  }
1306  }
1307  }
1308  if (rows.size() > 0) {
1309  return true;
1310  }
1311  return false;
1312 }
1313 
1314 bool CCdCore::GetMmdbId(int SeqIndex, int& id) const{
1315 //-------------------------------------------------------------------------
1316 // get mmdb-id from sequence list
1317 //-------------------------------------------------------------------------
1318  list< CRef< CSeq_entry > >::const_iterator i;
1319 
1320  int SeqCount;
1321 
1322  if (SeqIndex < 0) {
1323  return false;
1324  }
1325 
1326  if (IsSetSequences()) {
1327  if (GetSequences().IsSet()) {
1328  SeqCount = 0;
1329  // look through each sequence in set for SeqIndex sequence
1330  for (i=GetSequences().GetSet().GetSeq_set().begin();
1331  i!=GetSequences().GetSet().GetSeq_set().end(); i++) {
1332  if (SeqCount == SeqIndex) {
1333  if ((*i)->IsSeq()) {
1334  id = GetMMDBId((*i)->GetSeq()); // library call
1335  if (id > 0) {
1336  return(true);
1337  }
1338  }
1339  }
1340  SeqCount++;
1341  if (SeqCount > SeqIndex) break;
1342  }
1343  }
1344  }
1345  return(false);
1346 }
1347 
1348 /* ====================== */
1349 /* CD alignment methods */
1350 /* ====================== */
1351 
1352 // Return the first seqAnnot of type 'align'
1354  list< CRef< CSeq_annot > >::const_iterator sancit;
1355  if (IsSetSeqannot()) {
1356  for (sancit = GetSeqannot().begin(); sancit != GetSeqannot().end(); ++sancit) {
1357  if ((*sancit)->GetData().IsAlign()) {
1358  return *sancit;
1359  }
1360  }
1361  }
1362  return EMPTY_CREF_SEQANNOT;
1363 }
1364 
1365 // Return the first seqAnnot of type 'align'
1367  list< CRef< CSeq_annot > >::iterator sanit;
1368  int count = 0;
1369 
1370  seqAnnot = null;
1371  if (IsSetSeqannot()) {
1372  for (sanit = SetSeqannot().begin(); count == 0 && sanit != SetSeqannot().end(); ++sanit) {
1373  if ((*sanit)->SetData().IsAlign()) {
1374  ++count;
1375  seqAnnot = (*sanit);
1376  }
1377  }
1378  }
1379  return (count == 1);
1380 }
1381 
1382 
1383 bool CCdCore::IsSeqAligns() const {
1384 //-------------------------------------------------------------------------
1385 // check if there are seq-aligns
1386 //-------------------------------------------------------------------------
1387  list< CRef< CSeq_annot > >::const_iterator i;
1388 
1389  if (IsSetSeqannot()) {
1390  i = GetSeqannot().begin();
1391  if ((*i)->GetData().IsAlign()) {
1392  return(true);
1393  }
1394  }
1395  return(false);
1396 }
1397 
1398 const list< CRef< CSeq_align > >& CCdCore::GetSeqAligns() const {
1399 //-------------------------------------------------------------------------
1400 // get the seq-aligns. Must know they're present (call IsSeqAligns() first)
1401 // Assumes the first seq_annot is the alignment.
1402 //-------------------------------------------------------------------------
1403  list< CRef< CSeq_annot > >::const_iterator i;
1404 
1405  i = GetSeqannot().begin();
1406  return((*i)->GetData().GetAlign());
1407 }
1408 
1409 
1410 list< CRef< CSeq_align > >& CCdCore::GetSeqAligns() {
1411 //-------------------------------------------------------------------------
1412 // get the list of Seq-aligns for editing
1413 // Empty list returned when none are present.
1414 //-------------------------------------------------------------------------
1415  return SetSeqannot().front()->SetData().SetAlign();
1416 }
1417 
1418 // get the Row-th Seq-align
1419 bool CCdCore::GetSeqAlign(int Row, CRef< CSeq_align >& seqAlign) {
1420 
1421  list< CRef< CSeq_align > >::iterator j;
1422 
1423  if (IsSeqAligns() && Row >= 0) {
1424  list< CRef< CSeq_align > > lsa = GetSeqAligns();
1425  // figure out which seq-align to get (based on Row)
1426  if (Row == 0) {
1427  seqAlign = lsa.front();
1428  return true;
1429  } else {
1430  int Count = 0;
1431  for (j = lsa.begin(); j != lsa.end(); j++) {
1432  if (++Count == Row) {
1433  seqAlign = *j;
1434  return true;
1435  }
1436  }
1437  }
1438  }
1439  return false;
1440 }
1441 
1442 // get the Row-th Seq-align
1443 // Burden is placed on caller to ensure Row is not too large.
1445 
1446  int Count = 0;
1447 
1448  if (IsSeqAligns() && Row >= 0) {
1449  if (Row == 0) {
1450  return (GetSeqAligns().front());
1451  } else {
1452  list< CRef< CSeq_align > >::const_iterator j, jend = GetSeqAligns().end();
1453  for (j = GetSeqAligns().begin(); j != jend; j++) {
1454  if (++Count == Row) {
1455  return (*j);
1456  }
1457  }
1458  }
1459 
1460  }
1461  return EMPTY_CREF_SEQALIGN;
1462 }
1463 
1464 
1465 // Returns coordinate on 'otherRow' that is mapped to 'thisPos' on 'thisRow'.
1466 // Returns INVALID_MAPPED_POSITION on failure.
1467 int CCdCore::MapPositionToOtherRow(int thisRow, int thisPos, int otherRow) const {
1468 
1469  int masterPos, otherPos = INVALID_POSITION;
1470  if (thisPos < 0 || thisRow < 0 || otherRow < 0) {
1471  return otherPos;
1472  } else if (thisRow == otherRow) {
1473  return thisPos;
1474  }
1475 
1476  if (thisRow == 0) { // direct master->child mapping
1477  const CRef< CSeq_align >& seqalign = GetSeqAlign(otherRow);
1478  if (seqalign.NotEmpty()) {
1479  otherPos = MapPositionToChild(thisPos, *seqalign);
1480  }
1481  } else {
1482  const CRef< CSeq_align >& seqalign = GetSeqAlign(thisRow);
1483  if (seqalign.NotEmpty()) {
1484  masterPos = MapPositionToMaster(thisPos, *seqalign);
1485  if (otherRow != 0) { // child->child mapping
1486  const CRef< CSeq_align >& otherSeqalign = GetSeqAlign(otherRow);
1487  if (seqalign.NotEmpty()) {
1488  otherPos = MapPositionToChild(masterPos, *otherSeqalign);
1489  }
1490  } else { // child->master mapping
1491  otherPos = masterPos;
1492  }
1493  }
1494  }
1495  return otherPos;
1496 }
1497 
1498 // mapDir controls direction of mapping
1499 int CCdCore::MapPositionToOtherRow(const CRef< CSeq_align >& seqAlign, int thisPos, CoordMapDir mapDir) const {
1500  int otherPos = INVALID_POSITION;
1501  if (thisPos >= 0) {
1502  otherPos = (mapDir == CHILD_TO_MASTER) ? MapPositionToMaster(thisPos, *seqAlign)
1503  : MapPositionToChild(thisPos, *seqAlign);
1504  }
1505  return otherPos;
1506 }
1507 
1508 
1510 //-------------------------------------------------------------------------
1511 // check if this CD has a consensus in the alignment or sequence list
1512 //-------------------------------------------------------------------------
1513  bool result = false;
1514  int nrows = GetNumRows();
1515 
1516  CRef< CSeq_id > SeqID;
1517 
1518  for (int i = 0; i < nrows; ++i) {
1519  if (GetSeqIDFromAlignment(i, SeqID)) {
1520  if (IsConsensus(SeqID)) {
1521  result = true;
1522  break;
1523  }
1524  }
1525  }
1526 
1527  // If no consensus in the alignment, check the sequence list.
1528  if (!result) {
1530  }
1531 
1532  return result;
1533 }
1534 
1535 
1536 bool CCdCore::FindConsensusInSequenceList(vector<int>* indices) const {
1537  bool result = false;
1538  int nseqs = GetNumSequences();
1539 
1540  // Make sure a consensus is not lurking in the sequence list...
1541  for (int i = 0; i < nseqs; ++i) {
1542  const list< CRef< CSeq_id > >& ids = GetSeqIDs(i);
1543  for (list<CRef< CSeq_id > >::const_iterator lit = ids.begin(); lit != ids.end(); ++lit) {
1544  if (IsConsensus(*lit)) {
1545  result = true;
1546  if (indices == NULL) {
1547  return result;
1548  }
1549  indices->push_back(i);
1550  break;
1551  }
1552  }
1553  }
1554 
1555  return(result);
1556 }
1557 
1559 //-------------------------------------------------------------------------
1560 // check if this CD uses consensus sequence for master
1561 //-------------------------------------------------------------------------
1562  CRef< CSeq_id > SeqID;
1563 
1564  if (GetSeqIDFromAlignment(0, SeqID)) {
1565  if (IsConsensus(SeqID)) {
1566  return(true);
1567  }
1568  }
1569  return(false);
1570 }
1571 
1572 int CCdCore::GetRowsWithConsensus(vector<int>& consensusRows) const {
1573 //-------------------------------------------------------------------------
1574 // look for rows where the seq_id refers to a consensus sequence
1575 //-------------------------------------------------------------------------
1576 
1577  int nrows = GetNumRows();
1578  CRef< CSeq_id > SeqID;
1579 
1580  consensusRows.clear();
1581  for (int i = 0; i < nrows; ++i) {
1582  if (GetSeqIDFromAlignment(i, SeqID)) {
1583  if (IsConsensus(SeqID)) {
1584  consensusRows.push_back(i);
1585  }
1586  }
1587  }
1588  return consensusRows.size();
1589 }
1590 
1591 
1593 //-------------------------------------------------------------------------
1594 // set comment of CD
1595 //-------------------------------------------------------------------------
1596  CCdd_descr_set::Tdata::iterator i;
1597 
1598  if (IsSetDescription()) {
1599  // if comment is set, reset it
1600  for (i=SetDescription().Set().begin(); i!=SetDescription().Set().end(); i++) {
1601  if ((*i)->IsComment() && (*i)->GetComment() == oldComment) {
1602  (*i)->SetComment(newComment);
1603  return;
1604  }
1605  }
1606  // otherwise add another description with comment
1607  CRef < CCdd_descr > Comment(new CCdd_descr());
1608  Comment->SetComment(newComment);
1609  SetDescription().Set().push_back(Comment);
1610  }
1611 }
1612 
1613 /* ========================================== */
1614 /* Alignment & structure annotation methods */
1615 /* ========================================== */
1616 
1617 bool CCdCore::AllResiduesInRangeAligned(int rowId, int from, int to) const {
1618 
1619  int i = 0, tmp, nBlocks, nextStart;
1620  bool toFound = true, onMaster = (rowId == 0) ? true : false;
1621  vector<int> blockStarts, blockLen;
1622 
1623  if (from > to) {
1624  tmp = to;
1625  to = from;
1626  from = tmp;
1627  }
1628 
1629  const CRef< CSeq_align >& seqAlign = GetSeqAlign(rowId);
1630  i = GetBlockNumberForResidue(from, seqAlign, onMaster, &blockStarts, &blockLen);
1631  if (i >= 0) {
1632  nBlocks = blockStarts.size();
1633  while (i < nBlocks && toFound) {
1634  if (to >= blockStarts[i] + blockLen[i]) { // 'to' beyond block i
1635  nextStart = (i == nBlocks - 1) ? 1000000000 : blockStarts[i+1];
1636  if (to < nextStart) { // 'to' is in between blocks
1637  toFound = false;
1638  } else if (nextStart != blockStarts[i] + blockLen[i]) { // non-adjacent blocks
1639  toFound = false;
1640  }
1641  ++i;
1642  } else {
1643  i = nBlocks;
1644  }
1645  }
1646  }
1647 
1648  return (i >= 0 && toFound);
1649 }
1650 
1651 bool CCdCore::AlignAnnotsValid(string* err) const{
1652 //-------------------------------------------------------------------------
1653 // check if the alignannot's are covered by aligned blocks
1654 //-------------------------------------------------------------------------
1655  bool result = true;
1656  int intNumber;
1657  int From, To, NewFrom, NewTo;
1658 
1659  list< CRef< CAlign_annot > >::const_iterator m;
1660  list< CRef< CSeq_interval > >::const_iterator n;
1661 
1662  if (err) {
1663  err->erase();
1664  }
1665 
1666  // if there's an align-annot set
1667  const CRef< CSeq_align >& masterSeqAlign = GetSeqAlign(0);
1668  if (masterSeqAlign.NotEmpty() && IsSetAlignannot()) {
1669 
1670  // for each alignannot
1671  for (m=GetAlignannot().Get().begin(); m!=GetAlignannot().Get().end(); m++) {
1672  // if it's a from-to
1673  if ((*m)->GetLocation().IsInt()) {
1674 
1675  // All coordinates of alignannots are given for the master.
1676  // If the end coordinates do not map to valid blocks in the
1677  // slave/child row, there is a problem.
1678  From = (*m)->GetLocation().GetInt().GetFrom();
1679  To = (*m)->GetLocation().GetInt().GetTo();
1680 
1681  // The annotation entry should be confined to a block,
1682  // or if it spans blocks, there can be no unaligned residues
1683  // between the blocks.
1684  if (AllResiduesInRangeAligned(0, From, To)) {
1685  NewFrom = MapPositionToOtherRow(masterSeqAlign, From, MASTER_TO_CHILD);
1686  NewTo = MapPositionToOtherRow(masterSeqAlign, To, MASTER_TO_CHILD);
1687  } else {
1688  NewFrom = INVALID_POSITION;
1689  NewTo = INVALID_POSITION;
1690  }
1691  if ((NewFrom == INVALID_POSITION) || (NewTo == INVALID_POSITION)) {
1692  result = false;
1693  if (err) {
1694  char s[1024];
1695  string d = ((*m)->IsSetDescription()) ? (*m)->GetDescription() : "<unnamed>";
1696  sprintf(s," ==> Annotation '%s' at [%d, %d]\n", d.c_str(), From+1, To+1);
1697  err->append(s);
1698  }
1699  }
1700  }
1701  // if it's a set of from-to's
1702  else if ((*m)->GetLocation().IsPacked_int()) {
1703  // for each from-to
1704  intNumber = 0;
1705  for (n=(*m)->GetLocation().GetPacked_int().Get().begin();
1706  n!=(*m)->GetLocation().GetPacked_int().Get().end(); n++) {
1707  // update from and to with new master
1708  From = (*n)->GetFrom();
1709  To = (*n)->GetTo();
1710  if (AllResiduesInRangeAligned(0, From, To)) {
1711  NewFrom = MapPositionToOtherRow(masterSeqAlign, From, MASTER_TO_CHILD);
1712  NewTo = MapPositionToOtherRow(masterSeqAlign, To, MASTER_TO_CHILD);
1713  } else {
1714  NewFrom = INVALID_POSITION;
1715  NewTo = INVALID_POSITION;
1716  }
1717  if ((NewFrom == INVALID_POSITION) || (NewTo == INVALID_POSITION)) {
1718  result = false;
1719  if (err) {
1720  char s[1024];
1721  string d = ((*m)->IsSetDescription()) ? (*m)->GetDescription() : "<unnamed>";
1722  sprintf(s," ==> Annotation '%s' at segment %d in range [%d, %d]\n",
1723  d.c_str(), intNumber+1, From+1, To+1);
1724  err->append(s);
1725  }
1726  }
1727  ++intNumber;
1728  }
1729  }
1730  }
1731  }
1732  return result;
1733 }
1734 
1736 //-------------------------------------------------------------------------
1737 // return the number of alignment annotations
1738 //-------------------------------------------------------------------------
1739  if (IsSetAlignannot()) {
1740  return(SetAlignannot().Set().size());
1741  }
1742  return(0);
1743 }
1744 
1745 
1747 //-------------------------------------------------------------------------
1748 // return the description for the Index alignment annotation
1749 //-------------------------------------------------------------------------
1750  list< CRef< CAlign_annot > >::iterator i;
1751 
1752  int Count=0;
1753  for (i=SetAlignannot().Set().begin(); i!=SetAlignannot().Set().end(); i++) {
1754  if (Count == Index) {
1755  if ((*i)->IsSetDescription()) {
1756  return((*i)->GetDescription());
1757  }
1758  else {
1759  return("");
1760  }
1761  }
1762  Count++;
1763  }
1764  return("");
1765 }
1766 
1768 {
1769  if (!IsSetSequences() || !GetSequences().IsSet() || !GetSequences().GetSet().IsSetSeq_set()) {
1770  bioseq.Reset();
1771  return false;
1772  }
1773 
1774  const CBioseq_set::TSeq_set& seqEntryList = GetSequences().GetSet().GetSeq_set();
1775  CBioseq_set::TSeq_set::const_iterator seListIt = seqEntryList.begin(), seListEnd = seqEntryList.end();
1776  list< CRef< CSeq_id > >::const_iterator lsii;
1777 
1778  for (; seListIt != seListEnd; ++seListIt) {
1779  if ((*seListIt)->IsSeq()) {
1780  const list< CRef< CSeq_id > > seqIdList = (*seListIt)->GetSeq().GetId();
1781  for (lsii = seqIdList.begin(); lsii != seqIdList.end(); ++lsii) {
1782  if (seqId->Match(**lsii)) {
1783  bioseq->Assign((*seListIt)->GetSeq());
1784  return true;
1785  }
1786  }
1787  }
1788  }
1789  return false;
1790 }
1791 
1792 // Recursively look for a bioseq with the given seqid; return the first instance found.
1793 bool CCdCore::GetBioseqWithSeqId(const CRef< CSeq_id>& seqId, const CBioseq*& bioseq) const
1794 {
1795  if (!IsSetSequences() || !GetSequences().IsSet() || !GetSequences().GetSet().IsSetSeq_set()) {
1796  return false;
1797  }
1798 
1799  const CBioseq_set::TSeq_set& seqEntryList = GetSequences().GetSet().GetSeq_set();
1800  return GetBioseqWithSeqid(seqId, seqEntryList, bioseq);
1801 }
1802 
1803 // Recursively look for a bioseq with the given seqid in seqEntryList; return the first instance found.
1804 bool CCdCore::GetBioseqWithSeqid(const CRef< CSeq_id>& seqid, const list< CRef< CSeq_entry > >& seqEntryList, const CBioseq*& bioseq) {
1805 
1806  bool result = false;
1807 
1808  list< CRef< CSeq_entry > >::const_iterator lsei;
1809 
1810 // const list< CRef< CSeq_id > > seqIdList;
1811  list< CRef< CSeq_id > >::const_iterator lsii;
1812 
1813  for (lsei = seqEntryList.begin(); lsei != seqEntryList.end(); ++lsei) {
1814  if ((*lsei)->IsSet()) {
1815  result = GetBioseqWithSeqid(seqid, (*lsei)->GetSet().GetSeq_set(), bioseq); // RECURSIVE!!
1816  if (result) {
1817  return result;
1818  }
1819  } else if ((*lsei)->IsSeq()) {
1820  const list< CRef< CSeq_id > > seqIdList = (*lsei)->GetSeq().GetId();
1821  for (lsii = seqIdList.begin(); lsii != seqIdList.end(); ++lsii) {
1822  if (seqid->Match(**lsii)) {
1823  bioseq = &(*lsei)->GetSeq();
1824  return true;
1825  }
1826  }
1827  }
1828 
1829  }
1830 
1831  return false;
1832 }
1833 
1834 /* ============================== */
1835 /* Parent CD identifier methods */
1836 /* ============================== */
1837 
1839  bool result = false;
1840  bool hasClassicalParent = HasParentType(CDomain_parent::eParent_type_classical);
1841 
1842  if (parentType == eClassicalParent) {
1843  result = hasClassicalParent;
1844  } else if (parentType == eComponentParent && !hasClassicalParent) {
1845 
1846  // Once know constraints are satisfied and there are no classical parents,
1847  // make sure every ancestor is not of type eParent_type_other.
1848  if (obeysParentTypeConstraints(this)) {
1849  if (IsSetAncestors()) {
1850  list< CRef< CDomain_parent > >::const_iterator pit, pit_end = GetAncestors().end();
1851  for (pit = GetAncestors().begin(); pit != pit_end && !result; ++pit) {
1852  if ((*pit)->GetParent_type() != CDomain_parent::eParent_type_other) {
1853  result = true;
1854  }
1855  }
1856  }
1857  }
1858  }
1859  return result;
1860 }
1861 
1863  bool result = obeysParentTypeConstraints(this);
1864 
1865  // Once know constraints are satisfied, just look for the type.
1866  if (result) {
1867  // 'ancestors' field set
1868  if (IsSetAncestors()) {
1869  list< CRef< CDomain_parent > >::const_iterator pit, pit_end = GetAncestors().end();
1870  result=false;
1871  for (pit = GetAncestors().begin(); (pit != pit_end) && !result; ++pit) {
1872  if ((*pit)->GetParent_type() == parentType) {
1873  result = true;
1874  }
1875  }
1876  // 'parent' field set
1877  } else if (IsSetParent()) {
1879  // neither 'ancestors' nor 'parent' set
1880  } else {
1881  result = false;
1882  }
1883  }
1884  return result;
1885 }
1886 
1887 bool CCdCore::GetClassicalParentId(const CCdd_id*& parentId) const {
1889  if (result) {
1890  if (IsSetAncestors()) {
1891  parentId = &(*(GetAncestors().begin()))->GetParentid();
1892  } else {
1893  parentId = &GetParent();
1894  }
1895  }
1896  return result;
1897 }
1898 
1900  int Dummy;
1901  return(GetClassicalParentAccession(Dummy));
1902 }
1903 
1904 string CCdCore::GetClassicalParentAccession(int& Version) const{
1905 //-------------------------------------------------------------------------
1906 // get accession name and version of parent
1907 //-------------------------------------------------------------------------
1908  string Str;
1909  const CCdd_id* parentId;
1910 
1911  if (GetClassicalParentId(parentId)) {
1912  Str = parentId->GetGid().GetAccession();
1913 
1914  if (parentId->IsGid()) {
1915  if (parentId->GetGid().IsSetVersion()) {
1916  Version = parentId->GetGid().GetVersion();
1917  }
1918  else {
1919  Version = 1;
1920  }
1921  }
1922  }
1923  return(Str);
1924 }
1925 
1926 bool CCdCore::AddComment(const string& comment)
1927 {
1928  bool result = (comment.length() > 0);
1929 
1930  // Don't add an identical comment.
1931  if (result && IsSetDescription()) {
1932  for (TDescription::Tdata::const_iterator cit = GetDescription().Get().begin(); result && cit != GetDescription().Get().end(); ++cit) {
1933  if ((*cit)->IsComment() && (*cit)->GetComment() == comment) {
1934  result = false;
1935  }
1936  }
1937  }
1938 
1939  if (result) {
1940  CRef<CCdd_descr> descr(new CCdd_descr);
1941  descr->SetComment(comment);
1942  result = AddCddDescr(descr);
1943  }
1944  return result;
1945 }
1946 
1947 bool CCdCore::AddOthername(const string& othername)
1948 {
1949  bool result = (othername.length() > 0);
1950 
1951  // Don't add an identical othername.
1952  if (result && IsSetDescription()) {
1953  for (TDescription::Tdata::const_iterator cit = GetDescription().Get().begin(); result && cit != GetDescription().Get().end(); ++cit) {
1954  if ((*cit)->IsOthername() && (*cit)->GetOthername() == othername) {
1955  result = false;
1956  }
1957  }
1958  }
1959 
1960  if (result) {
1961  CRef<CCdd_descr> descr(new CCdd_descr);
1962  descr->SetOthername(othername);
1963  result = AddCddDescr(descr);
1964  }
1965  return result;
1966 }
1967 
1968 bool CCdCore::AddTitle(const string& title)
1969 {
1970  bool result = (title.length() > 0);
1971 
1972  // Don't add an identical title.
1973  if (result && IsSetDescription()) {
1974  for (TDescription::Tdata::const_iterator cit = GetDescription().Get().begin(); result && cit != GetDescription().Get().end(); ++cit) {
1975  if ((*cit)->IsTitle() && (*cit)->GetTitle() == title) {
1976  result = false;
1977  }
1978  }
1979  }
1980 
1981  if (result) {
1982  CRef<CCdd_descr> descr(new CCdd_descr);
1983  descr->SetTitle(title);
1984  result = AddCddDescr(descr);
1985  }
1986  return result;
1987 }
1988 
1989 string CCdCore::GetTitle() const
1990 {
1991  string result = kEmptyStr;
1992 
1993  if (IsSetDescription()) {
1994  TDescription::Tdata::const_iterator cit = GetDescription().Get().begin();
1995  TDescription::Tdata::const_iterator cend = GetDescription().Get().end();
1996  while (cit != cend) {
1997  if ((*cit)->IsTitle()) {
1998  result = (*cit)->GetTitle();
1999  break;
2000  }
2001  ++cit;
2002  }
2003  }
2004 
2005  return result;
2006 }
2007 
2008 unsigned int CCdCore::GetTitles(vector<string>& titles) const
2009 {
2010  string result = kEmptyStr;
2011 
2012  titles.clear();
2013  if (IsSetDescription()) {
2014  TDescription::Tdata::const_iterator cit = GetDescription().Get().begin();
2015  TDescription::Tdata::const_iterator cend = GetDescription().Get().end();
2016  while (cit != cend) {
2017  if ((*cit)->IsTitle()) {
2018  result = (*cit)->GetTitle();
2019  titles.push_back(result);
2020  }
2021  ++cit;
2022  }
2023  }
2024 
2025  return titles.size();
2026 }
2027 
2029 {
2030  // Don't add a duplicate PMID.
2031  if (IsSetDescription()) {
2032  for (TDescription::Tdata::const_iterator cit = GetDescription().Get().begin(); cit != GetDescription().Get().end(); ++cit) {
2033  if ((*cit)->IsReference() && (*cit)->GetReference().IsPmid()) {
2034  if (pmid == (*cit)->GetReference().GetPmid()) {
2035  return false;
2036  }
2037  }
2038  }
2039  }
2040 
2041  // validate the pmid???
2042  CRef<CPub> pub(new CPub);
2043  pub->SetPmid(CPub::TPmid(pmid));
2044 
2045  CRef<CCdd_descr> descr(new CCdd_descr);
2046  descr->SetReference(*pub);
2047  return AddCddDescr(descr);
2048 }
2049 
2050 bool CCdCore::AddSource(const string& source, bool removeExisting)
2051 {
2052  bool result = (source.length() > 0);
2053 
2054  if (result) {
2055  if (removeExisting)
2057 
2058  CRef<CCdd_descr> descr(new CCdd_descr);
2059  descr->SetSource(source);
2060  result = AddCddDescr(descr);
2061  }
2062  return result;
2063 }
2064 
2066 {
2067  return SetCreationDate(this);
2068 }
2069 
2071 {
2072  if (!IsSetDescription()) {
2073  CCdd_descr_set* newDescrSet = new CCdd_descr_set();
2074  if (newDescrSet)
2075  SetDescription(*newDescrSet);
2076  else
2077  return false;
2078  }
2079 
2080  if (descr.NotEmpty()) {
2081  SetDescription().Set().push_back(descr);
2082  return true;
2083  }
2084  return false;
2085 }
2086 
2087 bool CCdCore::RemoveCddDescrsOfType(int cddDescrChoice)
2088 {
2089  if (cddDescrChoice <= CCdd_descr::e_not_set || cddDescrChoice >= CCdd_descr::e_MaxChoice) return false;
2090 
2091  unsigned int count = 0;
2092  bool reachedEnd = false;
2093  CCdd_descr_set::Tdata::iterator i, iEnd;
2094  if (IsSetDescription()) {
2095  while (!reachedEnd) {
2096  i = SetDescription().Set().begin();
2097  iEnd = SetDescription().Set().end();
2098  for (; i != iEnd; i++) {
2099  if ((*i)->Which() == cddDescrChoice) {
2100  ++count;
2101  SetDescription().Set().erase(i);
2102  break;
2103  }
2104  }
2105  reachedEnd = (i == iEnd);
2106  }
2107  }
2108  return (count > 0);
2109 }
2110 
2111 END_SCOPE(cd_utils)
User-defined methods of the data storage class.
bool GetCDBlockLengths(vector< int > &lengths) const
Definition: cuCdCore.cpp:291
int MapPositionToOtherRow(int thisRow, int thisPos, int otherRow) const
Definition: cuCdCore.cpp:1467
bool GetBioseqForIndex(int seqIndex, CRef< CBioseq > &bioseq)
Definition: cuCdCore.cpp:702
void EraseSequences()
Definition: cuCdCore.cpp:1032
void Clear()
Definition: cuCdCore.cpp:1160
int GetNumRows() const
Definition: cuCdCore.cpp:215
bool EraseTheseRows(const std::vector< int > &TossRows)
Definition: cuCdCore.cpp:958
bool IsSeqAligns() const
Definition: cuCdCore.cpp:1383
bool AllResiduesInRangeAligned(int rowId, int from, int to) const
Definition: cuCdCore.cpp:1617
int GetNumRowsWithSequences() const
Definition: cuCdCore.cpp:231
void EraseSequence(int SeqIndex)
Definition: cuCdCore.cpp:1077
CCdCore(void)
Definition: cuCdCore.cpp:68
int GetPSSMLength() const
Definition: cuCdCore.cpp:271
bool Has3DMaster() const
Definition: cuCdCore.cpp:1197
bool HasSeqId(const CRef< CSeq_id > &ID) const
Definition: cuCdCore.cpp:738
const CRef< CSeq_annot > & GetAlignment() const
Definition: cuCdCore.cpp:1353
bool GetClassicalParentId(const CCdd_id *&parentId) const
Definition: cuCdCore.cpp:1887
int GetUpperBound(int Row) const
Definition: cuCdCore.cpp:490
bool AddTitle(const string &title)
Definition: cuCdCore.cpp:1968
string GetSequenceStringByRow(int rowId)
Definition: cuCdCore.cpp:578
bool AddSequence(CRef< CSeq_entry > seqAntry)
Definition: cuCdCore.cpp:1153
bool GetSeqAlign(int Row, CRef< CSeq_align > &seqAlign)
Definition: cuCdCore.cpp:1419
int GetRowsWithConsensus(vector< int > &consensusRows) const
Definition: cuCdCore.cpp:1572
bool GetSeqIDFromAlignment(int RowIndex, CRef< CSeq_id > &SeqID) const
Definition: cuCdCore.cpp:815
string GetTitle() const
Definition: cuCdCore.cpp:1989
bool AddPendingSeqAlign(CRef< CSeq_align > seqAlign)
Definition: cuCdCore.cpp:1112
bool HasCddId(const CCdd_id &id) const
Definition: cuCdCore.cpp:161
bool FindConsensusInSequenceList(vector< int > *indices=NULL) const
Definition: cuCdCore.cpp:1536
TGi GetGIFromSequenceList(int SeqIndex) const
Definition: cuCdCore.cpp:588
string GetAccession() const
Definition: cuCdCore.cpp:76
bool Get_GI_or_PDB_String_FromAlignment(int RowIndex, std::string &Str, bool Pad, int Len) const
Definition: cuCdCore.cpp:510
string GetUpdateDate()
Definition: cuCdCore.cpp:196
int Num3DAlignments() const
Definition: cuCdCore.cpp:1227
string GetSequenceStringByIndex(int SeqIndex)
Definition: cuCdCore.cpp:724
void ErasePendingRow(int row)
Definition: cuCdCore.cpp:1136
bool AddComment(const string &comment)
Definition: cuCdCore.cpp:1926
int GetMasterSeqIndex() const
Definition: cuCdCore.cpp:333
string GetSpeciesForRow(int Row)
Definition: cuCdCore.cpp:568
bool AddCreateDate()
Definition: cuCdCore.cpp:2065
bool GetMmdbId(int SeqIndex, int &id) const
Definition: cuCdCore.cpp:1314
string GetLongDescription()
Definition: cuCdCore.cpp:179
bool AddSeqAlign(CRef< CSeq_align > seqAlign)
Definition: cuCdCore.cpp:1104
int GetNumAlignmentAnnotations()
Definition: cuCdCore.cpp:1735
bool AddCddDescr(CRef< CCdd_descr > &descr)
Definition: cuCdCore.cpp:2070
void EraseUID()
Definition: cuCdCore.cpp:132
unsigned int GetTitles(vector< string > &titles) const
Definition: cuCdCore.cpp:2008
bool GetBlockStartsForRow(int rowIndex, vector< int > &starts) const
Definition: cuCdCore.cpp:301
bool GetSeqEntryForRow(int rowId, CRef< CSeq_entry > &seqEntry) const
Definition: cuCdCore.cpp:529
bool CopyBioseqForSeqId(const CRef< CSeq_id > &seqId, CRef< CBioseq > &bioseq) const
Definition: cuCdCore.cpp:1767
void SetComment(CCdd_descr::TComment oldComment, CCdd_descr::TComment newComment)
Definition: cuCdCore.cpp:1592
bool GetSeqIDForIndex(int SeqIndex, CRef< CSeq_id > &SeqID) const
Definition: cuCdCore.cpp:829
bool GetBioseqWithSeqId(const CRef< CSeq_id > &seqid, const CBioseq *&bioseq) const
Definition: cuCdCore.cpp:1793
bool EraseOtherRows(const std::vector< int > &KeepRows)
Definition: cuCdCore.cpp:994
bool HasParentType(EClassicalOrComponent parentType) const
Definition: cuCdCore.cpp:1838
int GetSeqIndexForRowIndex(int rowIndex) const
Definition: cuCdCore.cpp:319
int GetAllRowIndicesForSeqId(const CRef< CSeq_id > &SeqID, list< int > &rows) const
Definition: cuCdCore.cpp:406
bool AlignAnnotsValid(string *err=NULL) const
Definition: cuCdCore.cpp:1651
bool AddPmidReference(TEntrezId pmid)
Definition: cuCdCore.cpp:2028
void ErasePendingRows(set< int > &rows)
Definition: cuCdCore.cpp:1127
int GetAlignmentLength() const
Definition: cuCdCore.cpp:257
int GetSeqIndex(const CRef< CSeq_id > &SeqID) const
Definition: cuCdCore.cpp:337
int GetNthMatchFor(CRef< CSeq_id > &ID, int N)
Definition: cuCdCore.cpp:365
bool AddOthername(const string &othername)
Definition: cuCdCore.cpp:1947
int GetLowerBound(int Row) const
Definition: cuCdCore.cpp:471
void SetAccession(string Accession, int Version)
Definition: cuCdCore.cpp:106
static bool GetBioseqWithSeqid(const CRef< CSeq_id > &seqid, const list< CRef< CSeq_entry > > &bsset, const CBioseq *&bioseq)
Definition: cuCdCore.cpp:1804
const list< CRef< CSeq_align > > & GetSeqAligns() const
Definition: cuCdCore.cpp:1398
int GetNumBlocks() const
Definition: cuCdCore.cpp:281
bool UsesConsensusSequenceAsMaster() const
Definition: cuCdCore.cpp:1558
bool GetGI(int Row, TGi &GI, bool ignorePDBs=true)
Definition: cuCdCore.cpp:433
bool RemoveCddDescrsOfType(int cddDescrChoice)
Definition: cuCdCore.cpp:2087
bool GetBioseqForRow(int rowId, CRef< CBioseq > &bioseq)
Definition: cuCdCore.cpp:561
string GetClassicalParentAccession() const
Definition: cuCdCore.cpp:1899
string GetSpeciesForIndex(int SeqIndex)
Definition: cuCdCore.cpp:670
bool HasConsensusSequence() const
Definition: cuCdCore.cpp:1509
bool GetRowsForMmdbId(int mmdbId, list< int > &rows) const
Definition: cuCdCore.cpp:1268
bool GetSeqIDs(int SeqIndex, list< CRef< CSeq_id > > &SeqIDs)
Definition: cuCdCore.cpp:900
bool GetRowsWithMmdbId(vector< int > &rows) const
Definition: cuCdCore.cpp:1293
int GetNumSequences() const
Definition: cuCdCore.cpp:244
bool AddSource(const string &source, bool removeExistingSources=true)
Definition: cuCdCore.cpp:2050
bool GetSeqIDForRow(int Pair, int DenDiagRow, CRef< CSeq_id > &SeqID) const
Definition: cuCdCore.cpp:787
bool GetSeqEntryForIndex(int seqIndex, CRef< CSeq_entry > &seqEntry) const
Definition: cuCdCore.cpp:679
bool GetPDB(int Row, const CPDB_seq_id *&pPDB)
Definition: cuCdCore.cpp:454
string GetDefline(int SeqIndex) const
Definition: cuCdCore.cpp:625
string GetAlignmentAnnotationDescription(int Index)
Definition: cuCdCore.cpp:1746
bool SynchronizeMaster3D(bool checkRow1WhenConsensusMaster=true)
Definition: cuCdCore.cpp:1170
int GetUID() const
Definition: cuCdCore.cpp:146
CCdd_descr_set –.
CCdd_descr –.
Definition: Cdd_descr.hpp:66
CCdd_id –.
Definition: Cdd_id.hpp:66
CGlobal_id –.
Definition: Global_id.hpp:66
Definition: Pub.hpp:56
CRef –.
Definition: ncbiobj.hpp:618
Definition: Seq_entry.hpp:56
CUpdate_align –.
CUpdate_comment –.
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
size_type size() const
Definition: set.hpp:132
#define Len
int MapPositionToMaster(int childPos, const CSeq_align &align)
Definition: cuAlign.cpp:116
CSeq_align::C_Segs::TDendiag TDendiag
Definition: cuAlign.hpp:48
int GetBlockCount(const CRef< CSeq_align > &seqAlign)
Definition: cuAlign.cpp:378
int GetBlockNumberForResidue(int residue, const CRef< CSeq_align > &seqAlign, bool onMaster, vector< int > *starts=NULL, vector< int > *lengths=NULL)
Definition: cuAlign.cpp:350
bool GetFirstOrLastDenDiag(const CRef< CSeq_align > &seqAlign, bool firstOrLast, CRef< CDense_diag > &dd)
Definition: cuAlign.cpp:457
bool EraseRow(CRef< CSeq_annot > &seqAnnot, int row)
Definition: cuAlign.cpp:658
int GetBlockLengths(const CRef< CSeq_align > &seqAlign, vector< int > &lengths)
Definition: cuAlign.cpp:391
int MapPositionToChild(int masterPos, const CSeq_align &align)
Definition: cuAlign.cpp:122
int GetBlockStarts(const CRef< CSeq_align > &seqAlign, vector< int > &starts, bool onMaster)
Definition: cuAlign.cpp:418
int GetNumAlignedResidues(const CRef< CSeq_align > &align)
Definition: cuAlign.cpp:238
bool SetCreationDate(CCdCore *cd)
Definition: cuCD.cpp:207
bool obeysParentTypeConstraints(const CCdCore *pCD)
Definition: cuCD.cpp:701
USING_SCOPE(objects)
const CRef< CSeq_align > EMPTY_CREF_SEQALIGN
Definition: cuCdCore.cpp:59
const CRef< CSeq_annot > EMPTY_CREF_SEQANNOT
Definition: cuCdCore.cpp:60
int intSortRowsFunction(void *pVal, int i, int j)
Definition: cuCdCore.cpp:949
EClassicalOrComponent
Definition: cuCdCore.hpp:48
@ eClassicalParent
Definition: cuCdCore.hpp:49
@ eComponentParent
Definition: cuCdCore.hpp:50
const int INVALID_POSITION
CoordMapDir
@ MASTER_TO_CHILD
@ CHILD_TO_MASTER
bool SeqIdHasMatchInBioseq(const CRef< CSeq_id > &id, const CBioseq &bioseq)
Definition: cuSequence.cpp:80
bool SeqIdsMatch(const CRef< CSeq_id > &id1, const CRef< CSeq_id > &id2)
Definition: cuSequence.cpp:70
string GetSpeciesFromBioseq(const CBioseq &bioseq)
Definition: cuSequence.cpp:190
bool GetNcbieaaString(const CBioseq &bioseq, string &str)
Definition: cuSequence.cpp:298
int GetMMDBId(const CBioseq &bioseq)
Definition: cuSequence.cpp:112
bool IsConsensus(const CRef< CSeq_id > &seqId)
Definition: cuSequence.cpp:405
void algSortQuickCallbackIndex(void *pVal, int n, int *istack, int *ind, algSORTFunction isCondFunc)
Definition: cuSort.cpp:19
string Make_SeqID_String(const CRef< CSeq_id > SeqID, bool Pad, int Len)
Definition: cuUtils.cpp:106
static char tmp[3200]
Definition: utf8.c:42
#define INVALID_GI
Definition: ncbimisc.hpp:1089
SStrictId_Entrez::TId TEntrezId
TEntrezId type for entrez ids which require the same strictness as TGi.
Definition: ncbimisc.hpp:1041
string
Definition: cgiapp.hpp:687
#define NULL
Definition: ncbistd.hpp:225
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
Definition: Seq_id.hpp:1065
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:735
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define kEmptyStr
Definition: ncbistr.hpp:123
const TSequences & GetSequences(void) const
Get the Sequences member data.
Definition: Cdd_.hpp:1268
bool IsGid(void) const
Check if variant Gid is selected.
Definition: Cdd_id_.hpp:287
bool IsSetSequences(void) const
store as bioseq-set inside seq-entry Check if a value has been assigned to Sequences data member.
Definition: Cdd_.hpp:1256
const Tdata & Get(void) const
Get the member data.
const TId & GetId(void) const
Get the Id member data.
Definition: Cdd_.hpp:1171
const TAccession & GetAccession(void) const
Get the Accession member data.
Definition: Global_id_.hpp:306
bool IsSetSeqannot(void) const
contains the CD alignment Check if a value has been assigned to Seqannot data member.
Definition: Cdd_.hpp:1210
TId & SetId(void)
Assign a value to Id data member.
Definition: Cdd_.hpp:1180
bool IsSetVersion(void) const
version 0 is the seed, version numbers increase with update/curate cycles Check if a value has been a...
Definition: Global_id_.hpp:388
Tdata & Set(void)
Assign a value to data member.
void ResetMaster3d(void)
Reset Master3d data member.
Definition: Cdd_.cpp:311
bool IsSetDescription(void) const
status, references, etc.
Definition: Cdd_.hpp:1189
TMaster3d & SetMaster3d(void)
Assign a value to Master3d data member.
Definition: Cdd_.hpp:1534
const TGid & GetGid(void) const
Get the variant data.
Definition: Cdd_id_.cpp:100
bool IsSetAlignannot(void) const
alignment annotation Check if a value has been assigned to Alignannot data member.
Definition: Cdd_.hpp:1541
const Tdata & Get(void) const
Get the member data.
const Tdata & Get(void) const
Get the member data.
const TAncestors & GetAncestors(void) const
Get the Ancestors member data.
Definition: Cdd_.hpp:1616
TPending & SetPending(void)
Assign a value to Pending data member.
Definition: Cdd_.hpp:1484
TVersion GetVersion(void) const
Get the Version member data.
Definition: Global_id_.hpp:407
TGid & SetGid(void)
Select the variant.
Definition: Cdd_id_.cpp:106
TSequences & SetSequences(void)
Assign a value to Sequences data member.
Definition: Cdd_.cpp:139
const TPending & GetPending(void) const
Get the Pending member data.
Definition: Cdd_.hpp:1478
TAlignannot & SetAlignannot(void)
Assign a value to Alignannot data member.
Definition: Cdd_.cpp:327
bool IsSetAncestors(void) const
list of parents Check if a value has been assigned to Ancestors data member.
Definition: Cdd_.hpp:1604
Tdata & Set(void)
Assign a value to data member.
list< CRef< CCdd_id > > Tdata
Definition: Cdd_id_set_.hpp:89
const TAlignannot & GetAlignannot(void) const
Get the Alignannot member data.
Definition: Cdd_.hpp:1553
const TSeqannot & GetSeqannot(void) const
Get the Seqannot member data.
Definition: Cdd_.hpp:1222
TSeqannot & SetSeqannot(void)
Assign a value to Seqannot data member.
Definition: Cdd_.hpp:1228
Tdata & Set(void)
Assign a value to data member.
const TDescription & GetDescription(void) const
Get the Description member data.
Definition: Cdd_.hpp:1201
const TParent & GetParent(void) const
Get the Parent member data.
Definition: Cdd_.hpp:1394
bool IsSetParent(void) const
this CD is the result of a split Check if a value has been assigned to Parent data member.
Definition: Cdd_.hpp:1382
TDescription & SetDescription(void)
Assign a value to Description data member.
Definition: Cdd_.cpp:99
@ e_Source
the database the seeds were created from, e.g. SMART, PFAM, etc..
Definition: Cdd_descr_.hpp:153
@ eParent_type_classical
the classification of parent child relations
@ e_MaxChoice
== e_Title+1
Definition: Cdd_descr_.hpp:168
TPmid & SetPmid(void)
Select the variant.
Definition: Pub_.hpp:690
vector< CRef< CSeq_id > > TIds
Definition: Dense_diag_.hpp:93
const TIds & GetIds(void) const
Get the Ids member data.
bool IsDendiag(void) const
Check if variant Dendiag is selected.
Definition: Seq_align_.hpp:720
const TDendiag & GetDendiag(void) const
Get the variant data.
Definition: Seq_align_.hpp:726
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:921
const TPdb & GetPdb(void) const
Get the variant data.
Definition: Seq_id_.cpp:435
bool IsPdb(void) const
Check if variant Pdb is selected.
Definition: Seq_id_.hpp:922
TGi GetGi(void) const
Get the variant data.
Definition: Seq_id_.hpp:889
bool IsGi(void) const
Check if variant Gi is selected.
Definition: Seq_id_.hpp:883
TSet & SetSet(void)
Select the variant.
Definition: Seq_entry_.cpp:130
const TSet & GetSet(void) const
Get the variant data.
Definition: Seq_entry_.cpp:124
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
list< CRef< CSeq_entry > > TSeq_set
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
bool IsAlign(void) const
Check if variant Align is selected.
Definition: Seq_annot_.hpp:635
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
list< CRef< CSeq_id > > TId
Definition: Bioseq_.hpp:94
const TAlign & GetAlign(void) const
Get the variant data.
Definition: Seq_annot_.hpp:641
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_annot_.hpp:873
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
int i
yy_size_t n
const TYPE & Get(const CNamedParameterList *param)
constexpr auto sort(_Init &&init)
constexpr auto front(list< Head, As... >, T=T()) noexcept -> Head
const struct ncbi::grid::netcache::search::fields::SIZE size
const CharType(& source)[N]
Definition: pointer.h:1149
#define row(bind, expected)
Definition: string_bind.c:73
else result
Definition: token2.c:20
#define const
Definition: zconf.h:232
#define N
Definition: crc32.c:57
Modified on Wed Apr 17 13:09:11 2024 by modify_doxy.py rev. 669887