NCBI C++ ToolKit
validerror_bioseqset.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: validerror_bioseqset.cpp 75762 2016-12-13 12:42:48Z bollin $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Jonathan Kans, Clifford Clausen, Aaron Ucko......
27  *
28  * File Description:
29  * validation of bioseq_set
30  * .......
31  *
32  */
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
37 
38 #include <objmgr/util/sequence.hpp>
39 
40 #include <serial/enumvalues.hpp>
41 #include <serial/iterator.hpp>
42 
45 
46 #include <objects/seq/Bioseq.hpp>
47 #include <objects/seq/MolInfo.hpp>
48 
53 
55 
56 #include <objmgr/bioseq_ci.hpp>
57 #include <objmgr/bioseq_handle.hpp>
58 #include <objmgr/seqdesc_ci.hpp>
59 #include <objmgr/feat_ci.hpp>
60 #include <objmgr/seq_annot_ci.hpp>
61 #include <objmgr/seq_entry_ci.hpp>
62 
64 
65 
69 using namespace sequence;
70 
71 
72 // =============================================================================
73 // Public
74 // =============================================================================
75 
76 
78  CValidError_base(imp) , m_AnnotValidator(imp) , m_DescrValidator(imp) , m_BioseqValidator(imp)
79 {
80 }
81 
82 
84 {
85 }
86 
87 
89  const CBioseq_set& seqset)
90 {
91  int protcnt = 0;
92  int nuccnt = 0;
93  int segcnt = 0;
94 
95  // Validate Set Contents
96  FOR_EACH_SEQENTRY_ON_SEQSET (se_list_it, seqset) {
97  const CSeq_entry& se = **se_list_it;
98  if ( se.IsSet() ) {
99  const CBioseq_set& set = se.GetSet();
100 
101  // look for internal genbank sets
102  if ( set.IsSetClass()
103  && set.GetClass() == CBioseq_set::eClass_genbank ) {
104 
106  "Bioseq-set contains internal GenBank Bioseq-set",
107  seqset);
108  }
109 
110  // validate member set
111  ValidateBioseqSet (set);
112  } else if (se.IsSeq()) {
113  const CBioseq& seq = se.GetSeq();
114 
115  // Validate Member Seq
117  }
118  }
119 
120  // note - need to do this with an iterator, so that we count sequences in subsets
122  for (; seqit; ++seqit) {
123 
124  if ( seqit->IsAa() ) {
125  protcnt++;
126  } else if ( seqit->IsNa() ) {
127  nuccnt++;
128  }
129 
130  if (seqit->GetInst().GetRepr() == CSeq_inst::eRepr_seg) {
131  segcnt++;
132  }
133  }
134 
135  switch ( seqset.GetClass() ) {
138  "Bioseq_set class not set", seqset);
139  break;
141  ValidateNucProtSet(seqset, nuccnt, protcnt, segcnt);
142  break;
143 
145  ValidateSegSet(seqset, segcnt);
146  break;
147 
149  ValidatePartsSet(seqset);
150  break;
151 
153  ValidateGenbankSet(seqset);
154  break;
155 
157  ValidatePopSet(seqset);
158  break;
159 
165  ValidatePhyMutEcoWgsSet(seqset);
166  break;
167 
169  ValidateGenProdSet(seqset);
170  break;
172  if (!m_Imp.IsRefSeq()) {
174  "Set class should not be conset", seqset);
175  }
176  break;
177  /*
178  case CBioseq_set::eClass_other:
179  PostErr(eDiag_Critical, eErr_SEQ_PKG_GenomicProductPackagingProblem,
180  "Genomic product set class incorrectly set to other", seqset);
181  break;
182  */
183  default:
184  if ( nuccnt == 0 && protcnt == 0 ) {
186  "No Bioseqs in this set", seqset);
187  }
188  break;
189  }
190 
191  SetShouldNotHaveMolInfo(seqset);
192  ValidateSetTitle(seqset);
193  ValidateSetElements(seqset);
194 
195  if (seqset.IsSetClass()
196  && (seqset.GetClass() == CBioseq_set::eClass_pop_set
203  }
204 
205  if (seqset.IsSetClass()
206  && (seqset.GetClass() == CBioseq_set::eClass_genbank
213  ShouldHaveNoDblink(seqset);
214  }
215 
216  // validate annots
217  FOR_EACH_SEQANNOT_ON_SEQSET (annot_it, seqset) {
218  m_AnnotValidator.ValidateSeqAnnot (**annot_it);
219  m_AnnotValidator.ValidateSeqAnnotContext (**annot_it, seqset);
220  }
221  if (seqset.IsSetDescr()) {
223  if (bsh) {
225  if (ctx) {
227  }
228  }
229  }
230 }
231 
232 
233 // =============================================================================
234 // Private
235 // =============================================================================
236 
237 
239 {
240  if ( m_Imp.IsGPS() ) {
241  CFeat_CI mrna(
242  m_Scope->GetBioseqHandle(seq),
244  .SetByProduct());
245  return (bool)mrna;
246  }
247  return true;
248 }
249 
250 
252 {
253  // there should be a coding region on the contig whose product is seq
254  if (gps.IsSetSeq_set() && gps.GetSeq_set().size() > 0
255  && gps.GetSeq_set().front()->IsSeq()) {
256  CBioseq_Handle contig = m_Scope->GetBioseqHandle(gps.GetSeq_set().front()->GetSeq());
258  SAnnotSelector sel;
259  sel.SetByProduct(true);
260  CFeat_CI cds(prot, sel);
261  while (cds) {
263  if (cds_seq == contig) {
264  return true;
265  }
266  ++cds;
267  }
268  }
269 
270  return false;
271 }
272 
273 
275 (const CBioseq_set& seqset,
276  int nuccnt,
277  int protcnt,
278  int segcnt)
279 {
280  if ( nuccnt == 0 ) {
282  "No nucleotides in nuc-prot set", seqset);
283  } else if ( nuccnt > 1 && segcnt != 1) {
285  "Multiple unsegmented nucleotides in nuc-prot set", seqset);
286  }
287  if ( protcnt == 0 ) {
289  "No proteins in nuc-prot set", seqset);
290  }
291 
292  int prot_biosource = 0;
293  bool is_nm = false;
294 
295  sequence::CDeflineGenerator defline_generator;
296 
297  FOR_EACH_SEQENTRY_ON_SEQSET (se_list_it, seqset) {
298  if ( (*se_list_it)->IsSeq() ) {
299  const CBioseq& seq = (*se_list_it)->GetSeq();
300 
301 
303  const CSeqdesc& desc = **it;
304  if (desc.Which() != CSeqdesc::e_User) continue;
305  if (desc.GetUser().IsSetType()) {
306  const CUser_object& usr = desc.GetUser();
307  const CObject_id& oi = usr.GetType();
308  if (oi.IsStr() && NStr::EqualCase(oi.GetStr(), "DBLink")) {
309  PostErr(eDiag_Critical, eErr_SEQ_DESCR_DBLinkProblem, "DBLink user object should not be on a Bioseq", seq);
310  }
311  }
312  }
313 
316  if (seq.IsNa()) {
317  if (gps && !IsMrnaProductInGPS(seq) ) {
318  PostErr(eDiag_Warning,
320  "Nucleotide bioseq should be product of mRNA "
321  "feature on contig, but is not",
322  seq);
323  }
324  FOR_EACH_SEQID_ON_BIOSEQ (id_it, seq) {
325  if ((*id_it)->IsOther() && (*id_it)->GetOther().IsSetAccession()) {
326  const string& acc = (*id_it)->GetOther().GetAccession();
327  if (NStr::StartsWith(acc, "NM_")) {
328  is_nm = true;
329  }
330  }
331  }
332  } else if ( seq.IsAa() ) {
333  if (gps && !IsCDSProductInGPS(seq, *(gps.GetCompleteBioseq_set())) ) {
334  PostErr(eDiag_Warning,
336  "Protein bioseq should be product of CDS "
337  "feature on contig, but is not",
338  seq);
339  }
340  string instantiated = "";
342  if ((*it)->IsSource()) {
343  prot_biosource++;
344  }
345  if ((*it)->IsTitle()) {
346  instantiated = (*it)->GetTitle();
347  }
348  }
349  // look for instantiated protein titles that don't match
350 
351  if (!NStr::IsBlank(instantiated)) {
352  string generated = defline_generator.GenerateDefline(seq, *m_Scope, sequence::CDeflineGenerator::fIgnoreExisting);
353  if (!NStr::EqualNocase(instantiated, generated)) {
354  generated = defline_generator.GenerateDefline(seq, *m_Scope,
355  sequence::CDeflineGenerator::fIgnoreExisting | sequence::CDeflineGenerator::fAllProteinNames);
356  if (NStr::StartsWith (instantiated, "PREDICTED: ", NStr::eNocase)) {
357  instantiated.erase (0, 11);
358  } else if (NStr::StartsWith (instantiated, "UNVERIFIED: ", NStr::eNocase)) {
359  instantiated.erase (0, 12);
360  } else if (NStr::StartsWith (instantiated, "PUTATIVE PSEUDOGENE: ", NStr::eNocase)) {
361  instantiated.erase (0, 21);
362  }
363  if (NStr::StartsWith (generated, "PREDICTED: ", NStr::eNocase)) {
364  generated.erase (0, 11);
365  } else if (NStr::StartsWith (generated, "UNVERIFIED: ", NStr::eNocase)) {
366  generated.erase (0, 12);
367  } else if (NStr::StartsWith (generated, "PUTATIVE PSEUDOGENE: ", NStr::eNocase)) {
368  generated.erase (0, 21);
369  }
370  //okay if instantiated title has single trailing period
371  if (instantiated.length() == generated.length() + 1 && NStr::EndsWith(instantiated, ".")
372  && !NStr::EndsWith(instantiated, "..")) {
373  generated += ".";
374  }
375  if (!NStr::EqualNocase(instantiated, generated)) {
377  "Instantiated protein title does not match automatically "
378  "generated title", seq);
379  }
380  }
381  }
382  }
383  }
384 
385  if ( !(*se_list_it)->IsSet() )
386  continue;
387 
388  const CBioseq_set& set = (*se_list_it)->GetSet();
389  if ( set.GetClass() != CBioseq_set::eClass_segset ) {
390 
391  const CEnumeratedTypeValues* tv =
392  CBioseq_set::GetTypeInfo_enum_EClass();
393  const string& set_class = tv->FindName(set.GetClass(), true);
394 
396  "Nuc-prot Bioseq-set contains wrong Bioseq-set, "
397  "its class is \"" + set_class + "\".", set);
398  break;
399  }
400  }
401  if (prot_biosource > 1) {
403  "Nuc-prot set has " + NStr::IntToString (prot_biosource)
404  + " proteins with a BioSource descriptor", seqset);
405  } else if (prot_biosource > 0) {
407  "Nuc-prot set has 1 protein with a BioSource descriptor", seqset);
408  }
409 
410  bool has_source = false;
411  bool has_title = false;
412  bool has_refgenetracking = false;
413  FOR_EACH_DESCRIPTOR_ON_SEQSET (it, seqset) {
414  if ((*it)->IsSource()
415  && (*it)->GetSource().IsSetOrg()
416  && (*it)->GetSource().GetOrg().IsSetTaxname()
417  && !NStr::IsBlank ((*it)->GetSource().GetOrg().GetTaxname())) {
418  has_source = true;
419  } else if ((*it)->IsTitle()) {
420  has_title = true;
421  } else if ((*it)->IsUser()
422  && IsRefGeneTrackingObject((*it)->GetUser())) {
423  has_refgenetracking = true;
424  }
425  /*
426  if (has_title && has_source) {
427  break;
428  }
429  */
430  }
431 
432  if (!has_source) {
433  // error if does not have source and is not genprodset
435  if (!gps) {
437  "Nuc-prot set does not contain expected BioSource descriptor", seqset);
438  }
439  }
440 
441  if (has_title) {
443  "Nuc-prot set should not have title descriptor", seqset);
444  }
445 
446  if (has_refgenetracking && (! is_nm)) {
448  "Nuc-prot set should not have RefGeneTracking user object", seqset);
449  }
450 }
451 
452 
454 {
455  if (!seqset.IsSetClass()) {
456  return;
457  }
458 
460  const CMolInfo* mol_info = 0;
461 
462  for (; miit; ++miit) {
463  if (!miit->IsSetBiomol() || miit->GetBiomol() == CMolInfo::eBiomol_peptide) {
464  continue;
465  }
466  if (mol_info == 0) {
467  mol_info = &(*miit);
468  } else if (mol_info->GetBiomol() != miit->GetBiomol() ) {
469  if (seqset.GetClass() == CBioseq_set::eClass_segset) {
471  "Segmented set contains inconsistent MolInfo biomols",
472  seqset);
473  } else if (seqset.GetClass() == CBioseq_set::eClass_pop_set
480  "Pop/phy/mut/eco set contains inconsistent MolInfo biomols",
481  seqset);
482  }
483  break;
484  }
485  } // for
486 
487 }
488 
489 
490 void CValidError_bioseqset::ValidateSegSet(const CBioseq_set& seqset, int segcnt)
491 {
492  if ( segcnt == 0 ) {
494  "No segmented Bioseq in segset", seqset);
495  }
496 
498  CSeq_inst::EMol seq_inst_mol;
499 
500  FOR_EACH_SEQENTRY_ON_SEQSET (se_list_it, seqset) {
501  if ( (*se_list_it)->IsSeq() ) {
502  const CSeq_inst& seq_inst = (*se_list_it)->GetSeq().GetInst();
503 
504  if ( mol == CSeq_inst::eMol_not_set ||
505  mol == CSeq_inst::eMol_other ) {
506  mol = seq_inst.GetMol();
507  } else if ( (seq_inst_mol = seq_inst.GetMol()) != CSeq_inst::eMol_other) {
508  if ( seq_inst.IsNa() != CSeq_inst::IsNa(mol) ) {
510  "Segmented set contains mixture of nucleotides"
511  " and proteins", seqset);
512  break;
513  }
514  }
515  } else if ( (*se_list_it)->IsSet() ) {
516  const CBioseq_set& set = (*se_list_it)->GetSet();
517 
518  if ( set.IsSetClass() &&
520  const CEnumeratedTypeValues* tv =
521  CBioseq_set::GetTypeInfo_enum_EClass();
522  const string& set_class_str =
523  tv->FindName(set.GetClass(), true);
524 
526  "Segmented set contains wrong Bioseq-set, "
527  "its class is \"" + set_class_str + "\".", set);
528  break;
529  }
530  } // else if
531  } // iterate
532 
534 }
535 
536 
538 {
540  CSeq_inst::EMol seq_inst_mol;
541 
542  FOR_EACH_SEQENTRY_ON_SEQSET (se_list_it, seqset) {
543  if ( (*se_list_it)->IsSeq() ) {
544  const CSeq_inst& seq_inst = (*se_list_it)->GetSeq().GetInst();
545 
546  if ( mol == CSeq_inst::eMol_not_set ||
547  mol == CSeq_inst::eMol_other ) {
548  mol = seq_inst.GetMol();
549  } else {
550  seq_inst_mol = seq_inst.GetMol();
551  if ( seq_inst_mol != CSeq_inst::eMol_other) {
552  if ( seq_inst.IsNa() != CSeq_inst::IsNa(mol) ) {
554  "Parts set contains mixture of nucleotides "
555  "and proteins", seqset);
556  }
557  }
558  }
559  } else if ( (*se_list_it)->IsSet() ) {
560  const CBioseq_set& set = (*se_list_it)->GetSet();
561  const CEnumeratedTypeValues* tv =
562  CBioseq_set::GetTypeInfo_enum_EClass();
563  const string& set_class_str =
564  tv->FindName(set.GetClass(), true);
565 
567  "Parts set contains unwanted Bioseq-set, "
568  "its class is \"" + set_class_str + "\".", set);
569  } // else if
570  } // for
571 }
572 
573 
575 {
576 }
577 
578 
580 {
581  bool has_title = false;
582  FOR_EACH_DESCRIPTOR_ON_SEQSET(it, seqset) {
583  if ((*it)->IsTitle()) {
584  has_title = true;
585  break;
586  }
587  }
588 
589  if (seqset.NeedsDocsumTitle()) {
590  if (!has_title && (m_Imp.IsRefSeq() || m_Imp.IsEmbl() || m_Imp.IsDdbj() || m_Imp.IsGenbank())) {
592  "Pop/Phy/Mut/Eco set does not have title",
593  seqset);
594  }
595  } else if (has_title) {
597  "Only Pop/Phy/Mut/Eco sets should have titles",
598  seqset);
599  }
600 }
601 
602 
604 {
605  if (!seqset.IsSetClass()) {
606  return;
607  }
608  if (seqset.GetClass() == CBioseq_set::eClass_eco_set ||
612 
613  if (!seqset.IsSetSeq_set() || seqset.GetSeq_set().size() == 0) {
615  "Pop/Phy/Mut/Eco set has no components",
616  seqset);
617  } else if (seqset.GetSeq_set().size() == 1) {
618  bool has_alignment = false;
619  CSeq_annot_CI annot_it (m_Scope->GetBioseq_setHandle(seqset));
620  while (annot_it && !has_alignment) {
621  if (annot_it->IsAlign()) {
622  has_alignment = true;
623  }
624  ++annot_it;
625  }
626  if (!has_alignment) {
628  "Pop/Phy/Mut/Eco set has only one component and no alignments",
629  seqset);
630  }
631  }
632  }
633  if (m_Imp.IsIndexerVersion()) {
634  if (seqset.GetClass() == CBioseq_set::eClass_eco_set ||
638  CBioseq_CI b_i(m_Scope->GetBioseq_setHandle(seqset));
639  while (b_i) {
640  if (b_i->IsNa()) {
641  const CBioseq& seq = *(b_i->GetCompleteBioseq());
642  bool has_title = false;
643  FOR_EACH_DESCRIPTOR_ON_BIOSEQ (d_i, seq) {
644  if ((*d_i)->IsTitle()) {
645  has_title = true;
646  break;
647  }
648  }
649  if (!has_title && (m_Imp.IsRefSeq() || m_Imp.IsEmbl() || m_Imp.IsDdbj() || m_Imp.IsGenbank())) {
651  "Nucleotide component of pop/phy/mut/eco/wgs set is missing its title",
652  seq);
653  }
654  }
655  ++b_i;
656  }
657  }
658  }
659 }
660 
661 
663 {
664  string class_name = "";
665 
666  switch (seqset.GetClass()) {
668  class_name = "Pop set";
669  break;
671  class_name = "Mut set";
672  break;
674  class_name = "Genbank set";
675  break;
679  class_name = "Phy/eco/wgs set";
680  break;
682  class_name = "GenProd set";
683  break;
685  class_name = "Small genome set";
686  break;
687  default:
688  return;
689  break;
690  }
691 
692  FOR_EACH_DESCRIPTOR_ON_SEQSET (it, seqset) {
693  if ((*it)->IsMolinfo()) {
695  class_name + " has MolInfo on set", seqset);
696  return;
697  }
698  }
699 }
700 
701 
703 {
704  static const string sp = " sp. ";
705 
706  if (m_Imp.IsRefSeq()) {
708  "RefSeq record should not be a Pop-set", seqset);
709  }
710 
712  string first_taxname = "";
713  bool is_first = true;
714  for (; seqit; ++seqit) {
715  string taxname = "";
716  CBioseq_Handle bsh = m_Scope->GetBioseqHandle (*seqit);
717  // Will get the first biosource either from the descriptor
718  // or feature.
720  if (d) {
721  if (d->GetSource().IsSetOrg() && d->GetSource().GetOrg().IsSetTaxname()) {
722  taxname = d->GetSource().GetOrg().GetTaxname();
723  }
724  } else {
726  if (f && f->GetData().GetBiosrc().IsSetOrg() && f->GetData().GetBiosrc().GetOrg().IsSetTaxname()) {
727  taxname = f->GetData().GetBiosrc().GetOrg().GetTaxname();
728  }
729  }
730 
731  if (is_first) {
732  first_taxname = taxname;
733  is_first = false;
734  continue;
735  }
736 
737  // Make sure all the taxnames in the set are the same.
738  if ( NStr::CompareNocase(first_taxname, taxname) == 0 ) {
739  continue;
740  }
741 
742  // drops severity if first mismatch is same up to sp.
743  EDiagSev sev = eDiag_Error;
744  SIZE_TYPE pos = NStr::Find(taxname, sp);
745  if ( pos != NPOS ) {
746  SIZE_TYPE len = pos + sp.length();
747  if ( NStr::strncasecmp(first_taxname.c_str(),
748  taxname.c_str(),
749  len) == 0 ) {
750  sev = eDiag_Warning;
751  }
752  }
753  // drops severity if one name is subset of the other
754  SIZE_TYPE comp_len = min (taxname.length(), first_taxname.length());
755  if (NStr::EqualCase(taxname, 0, comp_len, first_taxname)) {
756  sev = eDiag_Warning;
757  }
758 
760  "Population set contains inconsistent organisms.",
761  seqset);
762  break;
763  }
765 }
766 
767 
769 {
771 }
772 
773 
775  const CBioseq_set& seqset)
776 {
777  bool id_no_good = false;
779 
780  // genprodset should not have annotations directly on set
781  if (seqset.IsSetAnnot()) {
784  "Seq-annot packaged directly on genomic product set", seqset);
785  }
786 
787  CBioseq_set::TSeq_set::const_iterator se_list_it =
788  seqset.GetSeq_set().begin();
789 
790  if ( !(**se_list_it).IsSeq() ) {
791  return;
792  }
793 
794  const CBioseq& seq = (*se_list_it)->GetSeq();
796 
798  for (; fi; ++fi) {
800  if ( fi->IsSetProduct() ) {
802  m_Scope, fi->GetProduct(), bsh.GetTSE_Handle());
803  if ( !cdna ) {
804  try {
805  const CSeq_id& id = GetId(fi->GetProduct(), m_Scope);
806  id_type = id.Which();
807  } catch (CException ) {
808  id_no_good = true;
809  } catch (std::exception ) {
810  id_no_good = true;
811  }
812 
813  // okay to have far RefSeq product
814  if ( id_no_good || (id_type != CSeq_id::e_Other) ) {
815  string loc_label;
816  fi->GetProduct().GetLabel(&loc_label);
817 
818  if (loc_label.empty()) {
819  loc_label = "?";
820  }
821 
824  "Product of mRNA feature (" + loc_label +
825  ") not packaged in genomic product set", seq);
826 
827  }
828  } // if (cdna == 0)
829  } else if (!sequence::IsPseudo(*(fi->GetSeq_feat()), *m_Scope)) {
832  "Product of mRNA feature (?) not packaged in "
833  "genomic product set", seq);
834  }
835  }
836  } // for
837 }
838 
839 
841 {
842  FOR_EACH_SEQENTRY_ON_SEQSET (it, seqset) {
843  if ((*it)->IsSet()) {
844  if (!(*it)->GetSet().IsSetClass()
845  || ((*it)->GetSet().GetClass() != CBioseq_set::eClass_nuc_prot
846  && (*it)->GetSet().GetClass() != CBioseq_set::eClass_segset
847  && (*it)->GetSet().GetClass() != CBioseq_set::eClass_parts)) {
850  "Nested sets within Pop/Phy/Mut/Eco/Wgs set", (*it)->GetSet());
851  }
852  CheckForImproperlyNestedSets((*it)->GetSet());
853  }
854  }
855 }
856 
858 {
859  FOR_EACH_DESCRIPTOR_ON_SEQSET (it, seqset) {
860  const CSeqdesc& desc = **it;
861  if (! SEQDESC_CHOICE_IS (desc, NCBI_SEQDESC(User))) continue;
862  const CUser_object& usr = desc.GetUser();
863  if (! usr.IsSetType()) continue;
864  const CObject_id& oi = usr.GetType();
865  if (! oi.IsStr()) continue;
866  if (! NStr::EqualNocase(oi.GetStr(), "DBLink")) continue;
869  "DBLink user object should not be on this set", seqset);
870  }
871 }
872 
873 
CValidError_bioseq m_BioseqValidator
CBioseq_Handle –.
const string & FindName(TEnumValueType value, bool allowBadValue) const
Find name of the enum by its numeric value.
Definition: enumerated.cpp:132
bool IsPseudo(const CSeq_feat &feat, CScope &scope)
Determines whether given feature is pseudo, using gene associated with feature if necessary Checks to...
Definition: sequence.cpp:1328
CSeq_entry_Handle GetParentEntry(void) const
Return a handle for the parent seq-entry of the bioseq.
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
Definition: BioSource_.hpp:485
CS_CONTEXT * ctx
Definition: t0006.c:12
void ValidateGenProdSet(const CBioseq_set &seqset)
#define FOR_EACH_SEQID_ON_BIOSEQ(Itr, Var)
FOR_EACH_SEQID_ON_BIOSEQ EDIT_EACH_SEQID_ON_BIOSEQ.
Definition: seq_macros.hpp:308
#define FOR_EACH_DESCRIPTOR_ON_BIOSEQ
Definition: seq_macros.hpp:241
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:102
bool IsIndexerVersion(void) const
Definition: validatorp.hpp:614
const CSeq_loc & GetProduct(void) const
bool IsEmbl(void) const
Definition: validatorp.hpp:635
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:73
bool IsSetTaxname(void) const
preferred formal name Check if a value has been assigned to Taxname data member.
Definition: Org_ref_.hpp:346
EDiagSev
Severity level for the posted diagnostics.
Definition: ncbidiag.hpp:644
bool IsDdbj(void) const
Definition: validatorp.hpp:636
#define fi
bool IsAa(void) const
Definition: Bioseq.cpp:350
Template class for iteration on objects of class C (non-medifiable version)
Definition: iterator.hpp:740
E_Choice
Choice variants.
Definition: Seq_id_.hpp:93
virtual ~CValidError_bioseqset(void)
Case insensitive compare.
Definition: ncbistr.hpp:1177
const TSource & GetSource(void) const
Get the variant data.
Definition: Seqdesc_.cpp:566
CSeq_annot_CI –.
CConstRef< CBioseq_set > GetCompleteBioseq_set(void) const
Return the complete bioseq-set object.
CConstRef< CSeq_feat > GetSeq_feat(void) const
Get current seq-feat.
TMol GetMol(void) const
Get the Mol member data.
Definition: Seq_inst_.hpp:597
bool IsSetType(void) const
type of object within class Check if a value has been assigned to Type data member.
CCacheImpl & GetCache(void)
ecological sample study
static bool IsNa(EMol mol)
Definition: Seq_inst.hpp:90
CConstRef< CSeq_entry > GetCompleteSeq_entry(void) const
Complete and get const reference to the seq-entry.
CValidError_annot m_AnnotValidator
CBioseq_set_Handle GetBioseq_setHandle(const CBioseq_set &seqset, EMissing action=eMissing_Default)
Definition: scope.cpp:164
Warning message.
Definition: ncbidiag.hpp:646
SAnnotSelector –.
bool IsRefSeq(void) const
Definition: validatorp.hpp:634
void ValidatePhyMutEcoWgsSet(const CBioseq_set &seqset)
CBioseq_Handle GetBioseqHandleFromLocation(CScope *scope, const CSeq_loc &loc, const CTSE_Handle &tse)
void ValidateNucProtSet(const CBioseq_set &seqset, int nuccnt, int protcnt, int segcnt)
#define NPOS
Definition: ncbistr.hpp:130
const TType & GetType(void) const
Get the Type member data.
const TBiosrc & GetBiosrc(void) const
Get the variant data.
bool IsSetProduct(void) const
bool IsSetSeq_set(void) const
Check if a value has been assigned to Seq_set data member.
segmented sequence + parts
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5186
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seqdesc_.hpp:903
const CSeq_loc & GetLocation(void) const
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:101
bool IsGenbank(void) const
Definition: validatorp.hpp:656
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2786
const TSeq & GetSeq(void) const
Get the variant data.
Definition: Seq_entry_.cpp:102
CBioseq_set_Handle –.
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
Informational message.
Definition: ncbidiag.hpp:645
bool IsMrnaProductInGPS(const CBioseq &seq)
TRepr GetRepr(void) const
Get the Repr member data.
Definition: Seq_inst_.hpp:550
CValidError_descr m_DescrValidator
CFeat_CI –.
Definition: feat_ci.hpp:63
whole genome shotgun project
Error message.
Definition: ncbidiag.hpp:647
TBiomol GetBiomol(void) const
Get the Biomol member data.
Definition: MolInfo_.hpp:434
const TTaxname & GetTaxname(void) const
Get the Taxname member data.
Definition: Org_ref_.hpp:358
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
bool IsCDSProductInGPS(const CBioseq &seq, const CBioseq_set &gps)
void ValidateBioseq(const CBioseq &seq)
segmented sequence
Definition: Seq_inst_.hpp:95
static bool EqualCase(const CTempString str, SIZE_TYPE pos, SIZE_TYPE n, const char *pattern)
Case-sensitive equality of a substring with a pattern.
Definition: ncbistr.hpp:5439
bool IsSet(void) const
Check if variant Set is selected.
Definition: Seq_entry_.hpp:263
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5519
user defined object
Definition: Seqdesc_.hpp:124
SAnnotSelector & SetByProduct(bool byProduct=true)
Set flag indicating if the features should be searched by their product rather than location...
void ValidatePartsSet(const CBioseq_set &seqset)
CValidError_imp & m_Imp
Definition: validatorp.hpp:995
bool IsGPS(void) const
Definition: validatorp.hpp:630
CSeq_entry_Handle –.
bool IsNa(void) const
Definition: Bioseq.cpp:345
bool IsSeq(void) const
Check if variant Seq is selected.
Definition: Seq_entry_.hpp:257
#define NCBI_SEQDESC(Type)
CSeqdesc definitions.
Definition: seq_macros.hpp:97
EMol
molecule class in living organism
Definition: Seq_inst_.hpp:108
bool IsRefGeneTrackingObject(const CUser_object &user)
Definition: utilities.cpp:865
void ValidateSeqAnnot(const CSeq_annot_Handle &annot)
#define FOR_EACH_DESCRIPTOR_ON_SEQSET
for historical reasons, 'other' = 'refseq'
Definition: Seq_id_.hpp:104
bool NeedsDocsumTitle() const
Definition: Bioseq_set.cpp:343
const TDescr & GetDescr(void) const
Get the Descr member data.
T min(T x_, T y_)
void ValidateSetElements(const CBioseq_set &seqset)
#define FOR_EACH_SEQENTRY_ON_SEQSET(Itr, Var)
FOR_EACH_SEQENTRY_ON_SEQSET EDIT_EACH_SEQENTRY_ON_SEQSET.
void ShouldHaveNoDblink(const CBioseq_set &seqset)
CException –.
Definition: ncbiexpt.hpp:709
void ValidateSeqDescr(const CSeq_descr &descr, const CSeq_entry &ctx)
void ValidatePopSet(const CBioseq_set &seqset)
bool IsSetAnnot(void) const
Check if a value has been assigned to Annot data member.
source of materials, includes Org-ref
Definition: Seqdesc_.hpp:133
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found...
Definition: Seq_entry.hpp:55
const TSet & GetSet(void) const
Get the variant data.
Definition: Seq_entry_.cpp:124
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
TClass GetClass(void) const
Get the Class member data.
static CRef< CScope > m_Scope
bool IsSetDescr(void) const
Check if a value has been assigned to Descr data member.
int len
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_id_.hpp:746
static int strncasecmp(const char *s1, const char *s2, size_t n)
Case-insensitive comparison of two zero-terminated strings, narrowed to the specified number of chara...
Definition: ncbistr.hpp:5349
void ValidateSegSet(const CBioseq_set &seqset, int segcnt)
void PostErr(EDiagSev sv, EErrType et, const string &msg, const CSerialObject &obj)
constructed sequence + parts
Critical error message.
Definition: ncbidiag.hpp:648
#define FOR_EACH_SEQANNOT_ON_SEQSET(Itr, Var)
FOR_EACH_SEQANNOT_ON_SEQSET EDIT_EACH_SEQANNOT_ON_SEQSET.
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:129
void ValidateSeqAnnotContext(const CSeq_annot &annot, const CBioseq &seq)
bool IsNa(void) const
Definition: Seq_inst.hpp:106
void CheckForInconsistentBiomols(const CBioseq_set &seqset)
const CSeqFeatData & GetData(void) const
#define SEQDESC_CHOICE_IS(Var, Chs)
SEQDESC_CHOICE_IS.
Definition: seq_macros.hpp:696
static bool EqualNocase(const CTempString str, SIZE_TYPE pos, SIZE_TYPE n, const char *pattern)
Case-insensitive equality of a substring with a pattern.
Definition: ncbistr.hpp:5465
No variant selected.
Definition: Seq_id_.hpp:94
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:70
CBioseq_set_Handle GetGenProdSetParent(CBioseq_set_Handle set)
Definition: utilities.cpp:549
const TInst & GetInst(void) const
Get the Inst member data.
Definition: Bioseq_.hpp:326
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5535
CBioseq_CI –.
Definition: bioseq_ci.hpp:68
bool IsSetBiomol(void) const
Check if a value has been assigned to Biomol data member.
Definition: MolInfo_.hpp:409
const CTSE_Handle & GetTSE_Handle(void) const
Get CTSE_Handle of containing TSE.
viral segments or mitochondrial minicircles
void CheckForImproperlyNestedSets(const CBioseq_set &seqset)
bool IsSetClass(void) const
Check if a value has been assigned to Class data member.
void ValidateGenbankSet(const CBioseq_set &seqset)
const TUser & GetUser(void) const
Get the variant data.
Definition: Seqdesc_.cpp:384
static int CompareNocase(const CTempString str, SIZE_TYPE pos, SIZE_TYPE n, const char *pattern)
Case-insensitive compare of a substring with a pattern.
Definition: ncbistr.cpp:170
Definition: set.hpp:44
nuc acid and coded proteins
Definition: Bioseq_set_.hpp:99
void ValidateSetTitle(const CBioseq_set &seqset)
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:64
genomic products, chrom+mRNA+protein
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:98
const TOrg & GetOrg(void) const
Get the Org member data.
Definition: BioSource_.hpp:497
void SetShouldNotHaveMolInfo(const CBioseq_set &seqset)
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
void ValidateBioseqSet(const CBioseq_set &seqset)
CConstBeginInfo ConstBegin(const C &obj)
Get starting point of non-modifiable object hierarchy.
Definition: iterator.hpp:966
ESubtype GetSubtype(void) const
Modified on Mon Mar 27 16:08:04 2017 by modify_doxy.py rev. 506947