NCBI C++ ToolKit
cleanup.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: cleanup.cpp 73335 2016-06-30 15:33:49Z bollin $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Robert Smith
27  *
28  * File Description:
29  * Basic Cleanup of CSeq_entries.
30  *
31  */
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbistd.hpp>
34 #include <serial/serialbase.hpp>
35 #include <objects/seq/Bioseq.hpp>
37 // included for GetPubdescLabels and GetCitationList
38 #include <objects/pub/Pub.hpp>
40 #include <objects/seq/Pubdesc.hpp>
46 
57 
59 #include <objmgr/util/sequence.hpp>
60 #include <objmgr/util/feature.hpp>
61 #include <objmgr/seq_annot_ci.hpp>
62 #include <objmgr/seqdesc_ci.hpp>
63 #include <objmgr/seq_vector.hpp>
64 #include <objmgr/seq_vector_ci.hpp>
66 #include "cleanup_utils.hpp"
68 
69 #include "newcleanupp.hpp"
70 
73 
76 };
77 
78 // *********************** CCleanup implementation **********************
79 
80 
82 {
84  if (scope) {
85  m_Scope->AddScope(*scope);
86  }
87 }
88 
89 
91 {
92 }
93 
94 
96 {
98  if (scope) {
99  m_Scope->AddScope(*scope);
100  }
101 }
102 
103 
104 static
106 {
107  CRef<CCleanupChange> changes;
108  if (! (options & CCleanup::eClean_NoReporting)) {
109  changes.Reset(new CCleanupChange);
110  }
111  return changes;
112 }
113 
114 #define CLEANUP_SETUP \
115  CRef<CCleanupChange> changes(makeCleanupChange(options)); \
116  CNewCleanup_imp clean_i(changes, options); \
117  clean_i.SetScope(*m_Scope);
118 
120 {
122  clean_i.BasicCleanupSeqEntry(se);
123  return changes;
124 }
125 
126 
128 {
130  clean_i.BasicCleanupSeqSubmit(ss);
131  return changes;
132 }
133 
134 
135 /// Cleanup a Bioseq.
137 {
139  clean_i.BasicCleanupBioseq(bs);
140  return changes;
141 }
142 
143 
145 {
147  clean_i.BasicCleanupBioseqSet(bss);
148  return changes;
149 }
150 
151 
153 {
155  clean_i.BasicCleanupSeqAnnot(sa);
156  return changes;
157 }
158 
159 
161 {
163  clean_i.BasicCleanupSeqFeat(sf);
164  return changes;
165 }
166 
167 
169 {
171  clean_i.BasicCleanupBioSource(src);
172  return changes;
173 }
174 
175 
177 {
178  CRef<CCleanupChange> changes(makeCleanupChange(options));
179  CNewCleanup_imp clean_i(changes, options);
180  clean_i.SetScope(seh.GetScope());
181  clean_i.BasicCleanupSeqEntryHandle(seh);
182  return changes;
183 }
184 
185 
187 {
188  CRef<CCleanupChange> changes(makeCleanupChange(options));
189  CNewCleanup_imp clean_i(changes, options);
190  clean_i.SetScope(bsh.GetScope());
191  clean_i.BasicCleanupBioseqHandle(bsh);
192  return changes;
193 }
194 
195 
197 {
198  CRef<CCleanupChange> changes(makeCleanupChange(options));
199  CNewCleanup_imp clean_i(changes, options);
200  clean_i.SetScope(bssh.GetScope());
201  clean_i.BasicCleanupBioseqSetHandle(bssh);
202  return changes;
203 }
204 
205 
207 {
208  CRef<CCleanupChange> changes(makeCleanupChange(options));
209  CNewCleanup_imp clean_i(changes, options);
210  clean_i.SetScope(sah.GetScope());
211  clean_i.BasicCleanupSeqAnnotHandle(sah);
212  return changes;
213 }
214 
215 
217 {
218  CRef<CCleanupChange> changes(makeCleanupChange(options));
219  CNewCleanup_imp clean_i(changes, options);
220  clean_i.SetScope(sfh.GetScope());
221  clean_i.BasicCleanupSeqFeatHandle(sfh);
222  return changes;
223 }
224 
225 
226 
227 
228 // *********************** Extended Cleanup implementation ********************
230 {
232  clean_i.ExtendedCleanupSeqEntry(se);
233 
234  return changes;
235 }
236 
237 
239 {
241  clean_i.ExtendedCleanupSeqSubmit(ss);
242  return changes;
243 }
244 
245 
247 {
249  clean_i.ExtendedCleanupSeqAnnot(sa); // (m_Scope->GetSeq_annotHandle(sa));
250  return changes;
251 }
252 
254 {
255  CRef<CCleanupChange> changes(makeCleanupChange(options));
256  CNewCleanup_imp clean_i(changes, options);
257  clean_i.SetScope(seh.GetScope());
258  clean_i.ExtendedCleanupSeqEntryHandle(seh); // (m_Scope->GetSeq_annotHandle(sa));
259  return changes;
260 }
261 
262 
263 // *********************** CCleanupChange implementation **********************
264 
265 
267 {
268 }
269 
270 
272 {
273  return m_Changes.count();
274 }
275 
276 
278 {
279  return m_Changes.test(e);
280 }
281 
282 
284 {
285  m_Changes.set(e);
286 }
287 
288 
289 vector<CCleanupChange::EChanges> CCleanupChange::GetAllChanges() const
290 {
291  vector<EChanges> result;
292  for (size_t i = eNoChange + 1; i < m_Changes.size(); ++i) {
293  if (m_Changes.test(i)) {
294  result.push_back( (EChanges) i);
295  }
296  }
297  return result;
298 }
299 
300 
302 {
303  vector<string> result;
304  for (size_t i = eNoChange + 1; i < m_Changes.size(); ++i) {
305  if (m_Changes.test(i)) {
306  result.push_back( GetDescription((EChanges) i) );
307  }
308  }
309  return result;
310 }
311 
312 
314 {
315  if (e <= eNoChange || e >= eNumberofChangeTypes) {
316  return sm_ChangeDesc[eNoChange];
317  }
318  return sm_ChangeDesc[e];
319 }
320 
321 // corresponds to the values in CCleanupChange::EChanges.
322 // They must be edited together.
323 const char* const CCleanupChange::sm_ChangeDesc[eNumberofChangeTypes + 1] = {
324  "Invalid Change Code",
325  // set when strings are changed.
326  "Trim Spaces",
327  "Clean Double Quotes",
328  "Append To String",
329  // set when lists are sorted or uniqued.
330  "Clean Qualifiers List",
331  "Clean Dbxrefs List",
332  "Clean CitonFeat List",
333  "Clean Keywords List",
334  "Clean Subsource List",
335  "Clean Orgmod List",
336  // Set when fields are moved or have content changes
337  "Repair BioseqMol", //10
338  "Change Feature Key",
339  "Normalize Authors",
340  "Change Publication",
341  "Change Qualifiers",
342  "Change Dbxrefs",
343  "Change Keywords",
344  "Change Subsource",
345  "Change Orgmod",
346  "Change Exception",
347  "Change Comment", //20
348  // Set when fields are rescued
349  "Change tRna",
350  "Change rRna",
351  "Change ITS",
352  "Change Anticodon",
353  "Change Code Break",
354  "Change Genetic Code",
355  "Copy GeneXref",
356  "Copy ProtXref",
357  // set when locations are repaired
358  "Change Seqloc",
359  "Change Strand", //30
360  "Change WholeLocation",
361  // set when MolInfo descriptors are affected
362  "Change MolInfo Descriptor",
363  // set when prot-xref is removed
364  "Remove ProtXref",
365  // set when gene-xref is removed
366  "Remove GeneXref",
367  // set when protein feature is added
368  "Add Protein Feature",
369  // set when feature is removed
370  "Remove Feature",
371  // set when feature is moved
372  "Move Feature",
373  // set when qualifier is removed
374  "Remove Qualifier",
375  // set when Gene Xref is created
376  "Add GeneXref",
377  // set when descriptor is removed
378  "Remove Descriptor", //40
379  "Remove Keyword",
380  "Add Descriptor",
381  "Move Descriptor",
382  "Convert Feature to Descriptor",
383  "Collapse Set",
384  "Change Feature Location",
385  "Remove Annotation",
386  "Convert Feature",
387  "Remove Comment",
388  "Add BioSource OrgMod", //50
389  "Add BioSource SubSource",
390  "Change BioSource Genome",
391  "Change BioSource Origin",
392  "Change BioSource Other",
393  "Change SeqId",
394  "Remove Empty Publication",
395  "Add Qualifier",
396  "Cleanup Date",
397  "Change BioseqInst",
398  "Remove SeqID", // 60
399  "Add ProtXref",
400  "Change Partial",
401  "Change Prot Names",
402  "Change Prot Activities",
403  "Change Site",
404  "Change PCR Primers",
405  "Change RNA-ref",
406  "Move To Prot Xref",
407  "Compress Spaces",
408  "Strip serial", // 70
409  "Remove Orgmod",
410  "Remove SubSource",
411  "Create Gene Nomenclature",
412  "Clean Seq-feat xref",
413  "Clean User-Object Or -Field",
414  "Letter Case Change",
415  "Change Bioseq-set Class",
416  "Unique Without Sort",
417  "Add RNA-ref",
418  "Change Gene-ref", // 80
419  "Clean Dbtag",
420  "Change Biomol",
421  "Change Cdregion",
422  "Clean EC Number",
423  "Remove Exception",
424  "Add NcbiCleanupObject",
425  "Clean Delta-ext",
426  "Trim Flanking Quotes",
427  "Clean Bioseq Title",
428  "Decode XML", // 90
429  "Remove Dup BioSource",
430  "Clean Org-ref",
431  "Trim Internal Semicolons",
432 
433  // set when any other change is made.
434  "Change Other",
435  "Invalid Change Code"
436 };
437 
438 
440 {
441  if (NStr::Equal(key, "sig_peptide")) {
443  } else if (NStr::Equal(key, "mat_peptide")) {
445  } else if (NStr::Equal(key, "transit_peptide")) {
447  } else if (NStr::Equal(key, "preprotein") || NStr::Equal(key, "proprotein")) {
449  } else {
451  }
452 }
453 
455 {
456  switch (processed) {
458  return "mat_peptide";
459  break;
461  return "preprotein";
462  break;
464  return "sig_peptide";
465  break;
467  return "transit_peptide";
468  break;
470  return kEmptyStr;
471  break;
472  }
473  return kEmptyStr;
474 }
475 
476 
478 {
479  if (fh.GetData().IsProt() && fh.GetData().GetProt().IsSetProcessed()) {
480  string key = s_KeyFromProcessed(fh.GetData().GetProt().GetProcessed());
481  if (!NStr::IsBlank(key)) {
482  CRef<CSeq_feat> new_feat(new CSeq_feat());
483  new_feat->Assign(*(fh.GetSeq_feat()));
484  if (fh.GetData().GetProt().IsSetName() && !fh.GetData().GetProt().GetName().empty()) {
485  CRef<CGb_qual> q(new CGb_qual());
486  q->SetQual("product");
487  q->SetVal(fh.GetData().GetProt().GetName().front());
488  new_feat->SetQual().push_back(q);
489  }
490  new_feat->SetData().SetImp().SetKey(key);
491  CSeq_feat_EditHandle efh(fh);
492  efh.Replace(*new_feat);
493  return true;
494  }
495  }
496  return false;
497 }
498 
499 
501 {
502  if (!fh.IsSetData()) {
503  return false;
504  } else if (fh.GetData().IsProt() &&
505  fh.GetData().GetProt().IsSetProcessed() &&
507  return true;
508  } else if (fh.GetData().IsImp() &&
509  fh.GetData().GetImp().IsSetKey() &&
511  return true;
512  } else {
513  return false;
514  }
515 }
516 
517 
519 {
520  if (!feat.IsSetQual() ||
521  !feat.IsSetData() ||
522  !feat.GetData().IsProt() ||
523  feat.GetData().GetProt().IsSetName()) {
524  return;
525  }
526  CSeq_feat::TQual::iterator it = feat.SetQual().begin();
527  while (it != feat.SetQual().end()) {
528  if ((*it)->IsSetQual() &&
529  NStr::Equal((*it)->GetQual(), "product")) {
530  if ((*it)->IsSetVal() && !NStr::IsBlank((*it)->GetVal())) {
531  feat.SetData().SetProt().SetName().push_back((*it)->GetVal());
532  }
533  it = feat.SetQual().erase(it);
534  break;
535  } else {
536  ++it;
537  }
538  }
539 
540  if (feat.SetQual().empty()) {
541  feat.ResetQual();
542  }
543 }
544 
545 
547 {
549  if (fh.GetData().IsImp()) {
550  if (!fh.GetData().GetImp().IsSetKey()) {
551  return false;
552  }
553  processed = s_ProcessedFromKey(fh.GetData().GetImp().GetKey());
554  if (processed == CProt_ref::eProcessed_not_set || processed == CProt_ref::eProcessed_preprotein) {
555  return false;
556  }
557  } else if (s_IsPreprotein(fh)) {
558  return ConvertProteinToImp(fh);
559  }
560 
561  CBioseq_Handle parent_bsh = fh.GetScope().GetBioseqHandle(fh.GetLocation());
562 
563  if (!parent_bsh) {
564  // feature is mispackaged
565  return false;
566  }
567  if (parent_bsh.IsAa()) {
568  // feature is already on protein sequence
569  return false;
570  }
571 
573  if (!cds || !cds->IsSetProduct()) {
574  // there is no overlapping coding region feature, so there is no appropriate
575  // protein sequence to move to
576  return ConvertProteinToImp(fh);
577  }
578 
579  CSeq_feat_Handle cds_h = fh.GetScope().GetSeq_featHandle(*cds);
580  if (!cds_h) {
581  // can't get handle
582  return false;
583  }
584 
585  CConstRef<CSeq_feat> orig_feat = fh.GetSeq_feat();
586  CRef<CSeq_feat> new_feat(new CSeq_feat());
587  new_feat->Assign(*orig_feat);
588  if (new_feat->GetData().Which() == CSeqFeatData::e_Imp) {
589  new_feat->SetData().SetProt().SetProcessed(processed);
590  // if possible, rescue product qual
591  RescueProtProductQual(*new_feat);
592  if (processed == CProt_ref::eProcessed_mature &&
593  !new_feat->GetData().GetProt().IsSetName()) {
594  if (orig_feat->IsSetComment() && !NStr::IsBlank(orig_feat->GetComment())) {
595  new_feat->SetData().SetProt().SetName().push_back(orig_feat->GetComment());
596  new_feat->ResetComment();
597  } else {
598  new_feat->SetData().SetProt().SetName().push_back("unnamed");
599  }
600  }
601  }
602 
603  CRef<CSeq_loc> new_loc;
604  CRef<CSeq_loc_Mapper> nuc2prot_mapper(
606  new_loc = nuc2prot_mapper->Map(orig_feat->GetLocation());
607  if (!new_loc) {
608  return false;
609  }
610  const CSeq_id* sid = new_loc->GetId();
611  const CSeq_id* orig_id = orig_feat->GetLocation().GetId();
612  if (!sid || (orig_id && sid->Equals(*orig_id))) {
613  // unable to map to protein location
614  return false;
615  }
617  if (new_loc->IsPartialStart(eExtreme_Biological)) {
618  new_loc->SetPartialStart(false, eExtreme_Biological);
619  }
620  }
622  if (new_loc->IsPartialStop(eExtreme_Biological)) {
623  new_loc->SetPartialStop(false, eExtreme_Biological);
624  }
625  }
626 
627  new_loc->ResetStrand();
628  // change location to protein
629  new_feat->ResetLocation();
630  new_feat->SetLocation(*new_loc);
631 
632 
633  CSeq_feat_EditHandle edh(fh);
634  edh.Replace(*new_feat);
636  CNewCleanup_imp clean_i(changes, 0);
637  clean_i.SetScope(fh.GetScope());
638  clean_i.BasicCleanupSeqFeat(*new_feat);
639 
640  CSeq_annot_Handle ah = fh.GetAnnot();
641 
642  CBioseq_Handle target_bsh = fh.GetScope().GetBioseqHandle(new_feat->GetLocation());
643  CBioseq_EditHandle eh = target_bsh.GetEditHandle();
644 
645  // Find a feature table on the protein sequence to add the feature to.
646  CSeq_annot_Handle ftable;
647  if (target_bsh.GetCompleteBioseq()->IsSetAnnot()) {
648  ITERATE(CBioseq::TAnnot, annot_it, target_bsh.GetCompleteBioseq()->GetAnnot()) {
649  if ((*annot_it)->IsFtable()) {
650  ftable = fh.GetScope().GetSeq_annotHandle(**annot_it);
651  }
652  }
653  }
654 
655  // If there is no feature table present, make one
656  if (!ftable) {
657  CRef<CSeq_annot> new_annot(new CSeq_annot());
658  ftable = eh.AttachAnnot(*new_annot);
659  }
660 
661  // add feature to the protein bioseq
662  CSeq_annot_EditHandle aeh(ftable);
663  aeh.TakeFeat(edh);
664 
665  // remove old annot if now empty
667  CSeq_annot_EditHandle orig(ah);
668  orig.Remove();
669  }
670 
671  return true;
672 }
673 
674 
676 {
677  bool any_change = false;
679  while (bi) {
683  for (CFeat_CI prot_it(*bi, sel); prot_it; ++prot_it) {
684  any_change |= MoveFeatToProtein(*prot_it);
685  }
686  for (CFeat_CI imp_it(*bi, CSeqFeatData::e_Imp); imp_it; ++imp_it) {
687  any_change |= MoveFeatToProtein(*imp_it);
688  }
689  ++bi;
690  }
691  return any_change;
692 }
693 
694 
695 bool CCleanup::IsGeneXrefUnnecessary(const CSeq_feat& sf, CScope& scope, const CGene_ref& gene_xref)
696 {
697  if (gene_xref.IsSuppressed()) {
698  return false;
699  }
700 
702  if (!gene || !gene->IsSetData() || !gene->GetData().IsGene()) {
703  return false;
704  }
705 
706  if (!gene->GetData().GetGene().RefersToSameGene(gene_xref)) {
707  return false;
708  }
709 
710  // see if other gene might also match
711  sequence::TFeatScores scores;
713  sequence::eOverlap_Contained, scores, scope);
714  if (scores.size() == 1) {
715  return true;
716  } else if (scores.size() == 0) {
717  return false;
718  }
719 
720  ITERATE(sequence::TFeatScores, g, scores) {
721  if (g->second.GetPointer() != gene.GetPointer() &&
722  sequence::Compare(g->second->GetLocation(), gene->GetLocation(), &scope) == sequence::eSame) {
723  return false;
724  }
725  }
726  return true;
727 }
728 
729 
731 {
732  if (!f.IsSetXref()) {
733  return false;
734  }
735  bool any_removed = false;
736  CSeq_feat::TXref::iterator xit = f.SetXref().begin();
737  while (xit != f.SetXref().end()) {
738  if ((*xit)->IsSetData() && (*xit)->GetData().IsGene() &&
739  IsGeneXrefUnnecessary(f, scope, (*xit)->GetData().GetGene())) {
740  xit = f.SetXref().erase(xit);
741  any_removed = true;
742  } else {
743  ++xit;
744  }
745  }
746  if (any_removed) {
747  if (f.IsSetXref() && f.GetXref().empty()) {
748  f.ResetXref();
749  }
750  }
751  return any_removed;
752 }
753 
754 
756 {
757  bool any_change = false;
758  CScope& scope = seh.GetScope();
759 
760  for (CFeat_CI fi(seh); fi; ++fi) {
761  if (fi->IsSetXref()) {
762  CRef<CSeq_feat> new_feat(new CSeq_feat());
763  new_feat->Assign(*(fi->GetOriginalSeq_feat()));
764  bool any_removed = RemoveUnnecessaryGeneXrefs(*new_feat, scope);
765  if (any_removed) {
766  CSeq_feat_EditHandle edh(*fi);
767  edh.Replace(*new_feat);
768  any_change = true;
769  }
770  }
771  }
772 
773  return any_change;
774 }
775 
777 {
778  if (!f.IsSetXref()) {
779  return false;
780  }
781  bool any_removed = false;
782  CSeq_feat::TXref::iterator xit = f.SetXref().begin();
783  while (xit != f.SetXref().end()) {
784  if ((*xit)->IsSetData() && (*xit)->GetData().IsGene() &&
785  !(*xit)->GetData().GetGene().IsSuppressed()) {
786  xit = f.SetXref().erase(xit);
787  any_removed = true;
788  } else {
789  ++xit;
790  }
791  }
792  if (any_removed) {
793  if (f.IsSetXref() && f.GetXref().empty()) {
794  f.ResetXref();
795  }
796  }
797  return any_removed;
798 }
799 
801 {
802  bool match = false;
803  string locus1;
804  if (gene_xref.IsSetLocus())
805  locus1 = gene_xref.GetLocus();
806  for (CFeat_CI feat_ci(bsh, SAnnotSelector(CSeqFeatData::eSubtype_gene)); feat_ci; ++feat_ci)
807  {
808  string locus2;
809  if ( !f.Equals(*feat_ci->GetSeq_feat()) && feat_ci->GetSeq_feat()->IsSetData() && feat_ci->GetSeq_feat()->GetData().IsGene()
810  && feat_ci->GetSeq_feat()->GetData().GetGene().IsSetLocus())
811  {
812  locus2 = feat_ci->GetSeq_feat()->GetData().GetGene().GetLocus();
813  }
814  if (!locus1.empty() && !locus2.empty() && locus1 == locus2)
815  {
816  match = true;
817  break;
818  }
819  }
820  return match;
821 }
822 
824 {
825  if (!f.IsSetXref()) {
826  return false;
827  }
828  bool any_removed = false;
829  CSeq_feat::TXref::iterator xit = f.SetXref().begin();
830  while (xit != f.SetXref().end()) {
831  if ((*xit)->IsSetData() && (*xit)->GetData().IsGene() &&
832  !(*xit)->GetData().GetGene().IsSuppressed() && !FindMatchingLocusGene(f, (*xit)->GetData().GetGene(), bsh)) {
833  xit = f.SetXref().erase(xit);
834  any_removed = true;
835  } else {
836  ++xit;
837  }
838  }
839  if (any_removed) {
840  if (f.IsSetXref() && f.GetXref().empty()) {
841  f.ResetXref();
842  }
843  }
844  return any_removed;
845 }
846 
848 {
849  bool match = false;
850  string locus_tag1;
851  if (gene_xref.IsSetLocus_tag())
852  locus_tag1 = gene_xref.GetLocus_tag();
853  for (CFeat_CI feat_ci(bsh, SAnnotSelector(CSeqFeatData::eSubtype_gene)); feat_ci; ++feat_ci)
854  {
855  string locus_tag2;
856  if ( !f.Equals(*feat_ci->GetSeq_feat()) && feat_ci->GetSeq_feat()->IsSetData() && feat_ci->GetSeq_feat()->GetData().IsGene()
857  && feat_ci->GetSeq_feat()->GetData().GetGene().IsSetLocus_tag())
858  {
859  locus_tag2 = feat_ci->GetSeq_feat()->GetData().GetGene().GetLocus_tag();
860  }
861  if (!locus_tag1.empty() && !locus_tag2.empty() && locus_tag1 == locus_tag2)
862  {
863  match = true;
864  break;
865  }
866  }
867  return match;
868 }
869 
871 {
872  if (!f.IsSetXref()) {
873  return false;
874  }
875  bool any_removed = false;
876  CSeq_feat::TXref::iterator xit = f.SetXref().begin();
877  while (xit != f.SetXref().end()) {
878  if ((*xit)->IsSetData() && (*xit)->GetData().IsGene() &&
879  !(*xit)->GetData().GetGene().IsSuppressed() && !FindMatchingLocus_tagGene(f, (*xit)->GetData().GetGene(), bsh)) {
880  xit = f.SetXref().erase(xit);
881  any_removed = true;
882  } else {
883  ++xit;
884  }
885  }
886  if (any_removed) {
887  if (f.IsSetXref() && f.GetXref().empty()) {
888  f.ResetXref();
889  }
890  }
891  return any_removed;
892 }
893 
894 
895 bool CCleanup::SeqLocExtend(CSeq_loc& loc, size_t pos, CScope& scope)
896 {
897  size_t loc_start = loc.GetStart(eExtreme_Positional);
898  size_t loc_stop = loc.GetStop(eExtreme_Positional);
899  bool partial_start = loc.IsPartialStart(eExtreme_Positional);
900  bool partial_stop = loc.IsPartialStop(eExtreme_Positional);
901  ENa_strand strand = loc.GetStrand();
902  CRef<CSeq_loc> new_loc(NULL);
903  bool changed = false;
904 
905  if (pos < loc_start) {
906  CRef<CSeq_id> id(new CSeq_id());
907  id->Assign(*(loc.GetId()));
908  CRef<CSeq_loc> add(new CSeq_loc(*id, pos, loc_start - 1, strand));
909  add->SetPartialStart(partial_start, eExtreme_Positional);
911  changed = true;
912  } else if (pos > loc_stop) {
913  CRef<CSeq_id> id(new CSeq_id());
914  id->Assign(*(loc.GetId()));
915  CRef<CSeq_loc> add(new CSeq_loc(*id, loc_stop + 1, pos, strand));
916  add->SetPartialStop(partial_stop, eExtreme_Positional);
918  changed = true;
919  }
920  if (changed) {
921  loc.Assign(*new_loc);
922  }
923  return changed;
924 }
925 
926 
928 {
929  const CSeq_loc& loc = f.GetLocation();
930  CRef<CSeq_loc> new_loc;
931 
932  const CGenetic_code* code = NULL;
933  if (f.IsSetData() && f.GetData().IsCdregion() && f.GetData().GetCdregion().IsSetCode()) {
934  code = &(f.GetData().GetCdregion().GetCode());
935  }
936 
937  size_t stop = loc.GetStop(eExtreme_Biological);
938  // figure out if we have a partial codon at the end
939  size_t orig_len = sequence::GetLength(loc, &(bsh.GetScope()));
940  size_t len = orig_len;
941  if (frame == CCdregion::eFrame_not_set &&
942  f.IsSetData() && f.GetData().IsCdregion() &&
943  f.GetData().GetCdregion().IsSetFrame()) {
944  frame = f.GetData().GetCdregion().GetFrame();
945  }
946  if (frame == CCdregion::eFrame_two) {
947  len -= 1;
948  } else if (frame == CCdregion::eFrame_three) {
949  len -= 2;
950  }
951 
952  size_t mod = len % 3;
953  CRef<CSeq_loc> vector_loc(new CSeq_loc());
954  vector_loc->SetInt().SetId().Assign(*(bsh.GetId().front().GetSeqId()));
955 
956  if (loc.IsSetStrand() && loc.GetStrand() == eNa_strand_minus) {
957  vector_loc->SetInt().SetFrom(0);
958  vector_loc->SetInt().SetTo(stop + mod - 1);
959  vector_loc->SetStrand(eNa_strand_minus);
960  } else {
961  vector_loc->SetInt().SetFrom(stop - mod + 1);
962  vector_loc->SetInt().SetTo(bsh.GetInst_Length() - 1);
963  }
964 
965  CSeqVector seq(*vector_loc, bsh.GetScope(), CBioseq_Handle::eCoding_Iupac);
966  // reserve our space
967  size_t usable_size = seq.size();
968 
969  if (limit > 0 && usable_size > limit) {
970  usable_size = limit;
971  }
972 
973  // get appropriate translation table
974  const CTrans_table & tbl =
975  (code ? CGen_code_table::GetTransTable(*code) :
977 
978  // main loop through bases
979  CSeqVector::const_iterator start = seq.begin();
980 
981  size_t i;
982  size_t k;
983  size_t state = 0;
984  size_t length = usable_size / 3;
985 
986  for (i = 0; i < length; ++i) {
987  // loop through one codon at a time
988  for (k = 0; k < 3; ++k, ++start) {
989  state = tbl.NextCodonState(state, *start);
990  }
991 
992  if (tbl.GetCodonResidue(state) == '*') {
993  if (loc.IsMix()) {
994  new_loc.Reset(new CSeq_loc());
995  new_loc->SetMix();
996  }
997  CSeq_loc_CI it(loc);
998  CSeq_loc_CI it_next = it;
999  ++it_next;
1000  while (it_next) {
1001  CConstRef<CSeq_loc> this_loc = it.GetRangeAsSeq_loc();
1002  if (new_loc) {
1003  new_loc->Add(*this_loc);
1004  } else {
1005  new_loc.Reset(new CSeq_loc());
1006  new_loc->Assign(*this_loc);
1007  }
1008  it = it_next;
1009  ++it_next;
1010  }
1011  CRef<CSeq_loc> last_interval(new CSeq_loc());
1012  CConstRef<CSeq_loc> this_loc = it.GetRangeAsSeq_loc();
1013  size_t this_start = this_loc->GetStart(eExtreme_Positional);
1014  size_t this_stop = this_loc->GetStop(eExtreme_Positional);
1015  size_t extension = ((i + 1) * 3) - mod;
1016  last_interval->SetInt().SetId().Assign(*(this_loc->GetId()));
1017  if (this_loc->IsSetStrand() && this_loc->GetStrand() == eNa_strand_minus) {
1018  last_interval->SetStrand(eNa_strand_minus);
1019  last_interval->SetInt().SetFrom(this_start - extension);
1020  last_interval->SetInt().SetTo(this_stop);
1021  } else {
1022  last_interval->SetInt().SetFrom(this_start);
1023  last_interval->SetInt().SetTo(this_stop + extension);
1024  if (this_loc->IsSetStrand()) {
1025  last_interval->SetInt().SetStrand(this_loc->GetStrand());
1026  }
1027  }
1028 
1029  if (new_loc) {
1030  new_loc->Add(*last_interval);
1031  } else {
1032  new_loc.Reset(new CSeq_loc());
1033  new_loc->Assign(*last_interval);
1034  }
1036  new_loc->SetPartialStop(false, eExtreme_Biological);
1037  f.SetLocation().Assign(*new_loc);
1038  return true;
1039  }
1040  }
1041 
1042  bool rval = false;
1043  if (usable_size < 3 && limit == 0) {
1044  if (loc.GetStrand() == eNa_strand_minus) {
1045  rval = SeqLocExtend(f.SetLocation(), 0, bsh.GetScope());
1046  } else {
1047  rval = SeqLocExtend(f.SetLocation(), bsh.GetInst_Length() - 1, bsh.GetScope());
1048  }
1049  f.SetLocation().SetPartialStop(true, eExtreme_Biological);
1050  }
1051 
1052  return rval;
1053 }
1054 
1055 
1057 {
1058  bool changed = false;
1060  if (cds.GetData().GetCdregion().IsSetFrame()) {
1061  frame = cds.GetData().GetCdregion().GetFrame();
1062  }
1063 
1064  CCdregion::TFrame new_frame = CSeqTranslator::FindBestFrame(cds, scope);
1065  if (frame != new_frame) {
1066  cds.SetData().SetCdregion().SetFrame(new_frame);
1067  changed = true;
1068  }
1069  return changed;
1070 }
1071 
1072 // like C's function GetFrameFromLoc, but better
1074 {
1075  if (!loc.IsPartialStart(eExtreme_Biological)) {
1076  if (frame != CCdregion::eFrame_one) {
1077  frame = CCdregion::eFrame_one;
1078  return true;
1079  }
1080  return false;
1081  }
1083  // cannot make a determination if both ends are partial
1084  return false;
1085  }
1086 
1087  const TSeqPos seq_len = sequence::GetLength(loc, &scope);
1088 
1090 
1091  // have complete last codon, get frame from length
1092  switch( (seq_len % 3) + 1 ) {
1093  case 1:
1094  desired_frame = CCdregion::eFrame_one;
1095  break;
1096  case 2:
1097  desired_frame = CCdregion::eFrame_two;
1098  break;
1099  case 3:
1100  desired_frame = CCdregion::eFrame_three;
1101  break;
1102  default:
1103  // mathematically impossible
1104  _ASSERT(false);
1105  return false;
1106  }
1107  if (frame != desired_frame) {
1108  frame = desired_frame;
1109  return true;
1110  }
1111  return false;
1112 }
1113 
1114 
1115 bool CCleanup::SetFrameFromLoc(CCdregion &cdregion, const CSeq_loc& loc, CScope& scope)
1116 {
1118  if (cdregion.IsSetFrame()) {
1119  frame = cdregion.GetFrame();
1120  }
1121  if (SetFrameFromLoc(frame, loc, scope)) {
1122  cdregion.SetFrame(frame);
1123  return true;
1124  } else {
1125  return false;
1126  }
1127 }
1128 
1129 
1130 bool IsTransSpliced(const CSeq_feat& feat)
1131 {
1132  if (feat.IsSetExcept_text() && NStr::Find(feat.GetExcept_text(), "trans-splicing") != string::npos) {
1133  return true;
1134  } else {
1135  return false;
1136  }
1137 }
1138 
1139 
1141 {
1142  const CGene_ref* gene = feat.GetGeneXref();
1143  if (gene && gene->IsSuppressed()) {
1144  return (CConstRef <CSeq_feat>());
1145  }
1146 
1147  if (gene) {
1149  bioseq_hl = sequence::GetBioseqFromSeqLoc(feat.GetLocation(), scope);
1150  if (!bioseq_hl) {
1151  return (CConstRef <CSeq_feat>());
1152  }
1153  CTSE_Handle tse_hl = bioseq_hl.GetTSE_Handle();
1154  if (gene->CanGetLocus_tag() && !(gene->GetLocus_tag().empty())) {
1156  seq_feat_hl = tse_hl.GetGeneWithLocus(gene->GetLocus_tag(), true);
1157  if (seq_feat_hl) {
1158  return (seq_feat_hl.GetOriginalSeq_feat());
1159  }
1160  } else if (gene->CanGetLocus() && !(gene->GetLocus().empty())) {
1162  seq_feat_hl = tse_hl.GetGeneWithLocus(gene->GetLocus(), false);
1163  if (seq_feat_hl) {
1164  return (seq_feat_hl.GetOriginalSeq_feat());
1165  }
1166  } else return (CConstRef <CSeq_feat>());
1167  } else {
1169  }
1170 
1171  return (CConstRef <CSeq_feat>());
1172 };
1173 
1174 
1175 bool CCleanup::IsPseudo(const CSeq_feat& feat, CScope& scope)
1176 {
1177  if (feat.IsSetPseudo() && feat.GetPseudo()) {
1178  return true;
1179  }
1180  if (feat.IsSetQual()) {
1181  ITERATE(CSeq_feat::TQual, it, feat.GetQual()) {
1182  if ((*it)->IsSetQual() && NStr::EqualNocase((*it)->GetQual(), "pseudogene")) {
1183  return true;
1184  }
1185  }
1186  }
1187  if (feat.GetData().IsGene()) {
1188  if (feat.GetData().GetGene().IsSetPseudo() && feat.GetData().GetGene().GetPseudo()) {
1189  return true;
1190  }
1191  } else {
1192  if (feat.IsSetXref()) {
1193  ITERATE(CSeq_feat::TXref, it, feat.GetXref()) {
1194  if ((*it)->IsSetData() && (*it)->GetData().IsGene() &&
1195  (*it)->GetData().GetGene().IsSetPseudo() &&
1196  (*it)->GetData().GetGene().GetPseudo()) {
1197  return true;
1198  }
1199  }
1200  }
1201  CConstRef<CSeq_feat> gene = GetGeneForFeature(feat, scope);
1202  if (gene && IsPseudo(*gene, scope)) {
1203  return true;
1204  }
1205  }
1206  return false;
1207 }
1208 
1209 
1211 {
1212  if (!f.GetData().IsCdregion()) {
1213  // not coding region
1214  return false;
1215  }
1216  if (IsPseudo(f, bsh.GetScope())) {
1217  return false;
1218  }
1220  return false;
1221  }
1222 
1223  if (check_for_stop) {
1224  string translation;
1225  try {
1226  CSeqTranslator::Translate(f, bsh.GetScope(), translation, true);
1227  } catch (CSeqMapException& e) {
1228  //unable to translate
1229  return false;
1230  } catch (CSeqVectorException& e) {
1231  //unable to translate
1232  return false;
1233  }
1234  if (NStr::EndsWith(translation, "*")) {
1235  //already has stop codon
1236  return false;
1237  }
1238  }
1239 
1240  return ExtendToStopCodon(f, bsh, 3);
1241 }
1242 
1243 
1244 void CCleanup::SetProteinName(CProt_ref& prot_ref, const string& protein_name, bool append)
1245 {
1246  if (append && prot_ref.IsSetName() &&
1247  prot_ref.GetName().size() > 0 &&
1248  !NStr::IsBlank(prot_ref.GetName().front())) {
1249  prot_ref.SetName().front() += "; " + protein_name;
1250  } else {
1251  prot_ref.ResetName();
1252  prot_ref.SetName().push_back(protein_name);
1253  }
1254 
1255 }
1256 
1257 
1258 void CCleanup::SetProteinName(CSeq_feat& cds, const string& protein_name, bool append, CScope& scope)
1259 {
1260  bool added = false;
1261  if (cds.IsSetProduct()) {
1262  CBioseq_Handle prot = scope.GetBioseqHandle(cds.GetProduct());
1263  if (prot) {
1264  // find main protein feature
1265  CFeat_CI feat_ci(prot, CSeqFeatData::eSubtype_prot);
1266  if (feat_ci) {
1267  CRef<CSeq_feat> new_prot(new CSeq_feat());
1268  new_prot->Assign(feat_ci->GetOriginalFeature());
1269  SetProteinName(new_prot->SetData().SetProt(), protein_name, append);
1270  CSeq_feat_EditHandle feh(feat_ci->GetSeq_feat_Handle());
1271  feh.Replace(*new_prot);
1272  } else {
1273  // make new protein feature
1274  feature::AddProteinFeature(*(prot.GetCompleteBioseq()), protein_name, cds, scope);
1275  }
1276  added = true;
1277  }
1278  }
1279  if (!added) {
1280  if (cds.IsSetXref()) {
1281  // see if this seq-feat already has a prot xref
1283  if ((*it)->IsSetData() && (*it)->GetData().IsProt()) {
1284  SetProteinName((*it)->SetData().SetProt(), protein_name, append);
1285  added = true;
1286  break;
1287  }
1288  }
1289  }
1290  if (!added) {
1291  CRef<CSeqFeatXref> xref(new CSeqFeatXref());
1292  xref->SetData().SetProt().SetName().push_back(protein_name);
1293  cds.SetXref().push_back(xref);
1294  }
1295  }
1296 }
1297 
1298 
1299 const string& CCleanup::GetProteinName(const CProt_ref& prot)
1300 {
1301  if (prot.IsSetName() && !prot.GetName().empty()) {
1302  return prot.GetName().front();
1303  } else {
1304  return kEmptyStr;
1305  }
1306 }
1307 
1308 
1309 const string& CCleanup::GetProteinName(const CSeq_feat& cds, CScope& scope)
1310 {
1311  if (cds.IsSetProduct()) {
1312  CBioseq_Handle prot = scope.GetBioseqHandle(cds.GetProduct());
1313  if (prot) {
1315  if (f) {
1316  return GetProteinName(f->GetData().GetProt());
1317  }
1318  }
1319  }
1320  if (cds.IsSetXref()) {
1321  ITERATE(CSeq_feat::TXref, it, cds.GetXref()) {
1322  if ((*it)->IsSetData() && (*it)->GetData().IsProt()) {
1323  return GetProteinName((*it)->GetData().GetProt());
1324  }
1325  }
1326  }
1327  return kEmptyStr;
1328 }
1329 
1330 
1332 {
1333  bool any_change = false;
1334 
1336  cds.GetData().GetCdregion().IsSetFrame() &&
1339  cds.SetLocation().SetPartialStart(true, eExtreme_Biological);
1340  any_change = true;
1341  }
1342 
1344  // look for start and stop codon
1345  string transl_prot;
1346  try {
1347  CSeqTranslator::Translate(cds, scope, transl_prot,
1348  true, // include stop codons
1349  false); // do not remove trailing X/B/Z
1350 
1351  } catch (const runtime_error&) {
1352  }
1353  if (!NStr::IsBlank(transl_prot)) {
1354  if (!cds.GetLocation().IsPartialStart(eExtreme_Biological) && !NStr::StartsWith(transl_prot, "M")) {
1355  cds.SetLocation().SetPartialStart(true, eExtreme_Biological);
1356  any_change = true;
1357  }
1358  if (!cds.GetLocation().IsPartialStop(eExtreme_Biological) && !NStr::EndsWith(transl_prot, "*")) {
1359  cds.SetLocation().SetPartialStop(true, eExtreme_Biological);
1360  any_change = true;
1361  }
1362  }
1363  }
1364 
1366 
1367  return any_change;
1368 }
1369 
1370 
1371 bool CCleanup::ClearInternalPartials(CSeq_loc& loc, bool is_first, bool is_last)
1372 {
1373  bool rval = false;
1374  switch (loc.Which()) {
1375  case CSeq_loc::e_Mix:
1376  rval |= ClearInternalPartials(loc.SetMix(), is_first, is_last);
1377  break;
1379  rval |= ClearInternalPartials(loc.SetPacked_int(), is_first, is_last);
1380  break;
1381  default:
1382  break;
1383  }
1384  return rval;
1385 }
1386 
1387 
1388 bool CCleanup::ClearInternalPartials(CSeq_loc_mix& mix, bool is_first, bool is_last)
1389 {
1390  bool rval = false;
1392  bool this_is_last = is_last && (*it == mix.Set().back());
1393  if ((*it)->IsMix() || (*it)->IsPacked_int()) {
1394  rval |= ClearInternalPartials(**it, is_first, this_is_last);
1395  } else {
1396  if (!is_first &&
1397  (*it)->IsPartialStart(eExtreme_Biological)) {
1398  (*it)->SetPartialStart(false, eExtreme_Biological);
1399  rval = true;
1400  }
1401  if (!this_is_last &&
1402  (*it)->IsPartialStop(eExtreme_Biological)) {
1403  (*it)->SetPartialStop(false, eExtreme_Biological);
1404  rval = true;
1405  }
1406  }
1407  is_first = false;
1408  }
1409  return rval;
1410 }
1411 
1412 
1413 bool CCleanup::ClearInternalPartials(CPacked_seqint& pint, bool is_first, bool is_last)
1414 {
1415  bool rval = false;
1416 
1418  bool this_is_last = is_last && (*it == pint.Set().back());
1419  if (!is_first && (*it)->IsPartialStart(eExtreme_Biological)) {
1420  (*it)->SetPartialStart(false, eExtreme_Biological);
1421  rval = true;
1422  }
1423  if (!this_is_last && (*it)->IsPartialStop(eExtreme_Biological)) {
1424  (*it)->SetPartialStop(false, eExtreme_Biological);
1425  rval = true;
1426  }
1427  is_first = false;
1428  }
1429  return rval;
1430 }
1431 
1432 
1434 {
1435  bool rval = false;
1436  CFeat_CI f(seh);
1437  while (f) {
1438  CRef<CSeq_feat> new_feat(new CSeq_feat());
1439  new_feat->Assign(*(f->GetSeq_feat()));
1440  if (ClearInternalPartials(new_feat->SetLocation())) {
1442  eh.Replace(*new_feat);
1443  }
1444  ++f;
1445  }
1446 
1447  return rval;
1448 }
1449 
1450 
1452 {
1453  bool changed = false;
1454  // CProt_ref::TEc is a list, so the iterator stays valid even if we
1455  // add new entries after the current one
1456  NON_CONST_ITERATE(CProt_ref::TEc, ec_num_iter, ec_num_list) {
1457  string & ec_num = *ec_num_iter;
1458  size_t tlen = ec_num.length();
1459  CleanVisStringJunk(ec_num);
1460  if (tlen != ec_num.length()) {
1461  changed = true;
1462  }
1464  !CProt_ref::IsECNumberSplit(ec_num)) {
1465  string new_val = CProt_ref::GetECNumberReplacement(ec_num);
1466  if (!NStr::IsBlank(new_val)) {
1467  ec_num = new_val;
1468  changed = true;
1469  }
1470  }
1471 
1472  }
1473  return changed;
1474 }
1475 
1477 {
1478  CBioseq_Handle bh = scope.GetBioseqHandle(gene.GetLocation());
1479  if (!bh) {
1480  return false;
1481  }
1482  CFeat_CI under(scope, gene.GetLocation());
1483  size_t longest = 0;
1484  CConstRef<CSeq_feat> longest_feat(NULL);
1485 
1486  while (under) {
1487  // ignore genes
1488  if (under->GetData().IsGene()) {
1489 
1490  } else {
1491  // must be contained in gene location
1492  sequence::ECompare loc_cmp = sequence::Compare(gene.GetLocation(), under->GetLocation(), &scope, sequence::fCompareOverlapping);
1493 
1494  if (loc_cmp == sequence::eSame || loc_cmp == sequence::eContains) {
1495  size_t len = sequence::GetLength(under->GetLocation(), &scope);
1496  // if longer than longest, record new length and feature
1497  if (len > longest) {
1498  longest_feat.Reset(under->GetSeq_feat());
1499  }
1500  }
1501  }
1502 
1503  ++under;
1504  }
1505  bool changed = false;
1506  if (longest_feat) {
1507  changed = feature::CopyFeaturePartials(gene, *longest_feat);
1508  }
1509  return changed;
1510 }
1511 
1512 
1514 {
1516  if (di) {
1517  if (di->GetMolinfo().IsSetTech() && di->GetMolinfo().GetTech() == tech) {
1518  // no change necessary
1519  return false;
1520  } else {
1521  CSeqdesc* d = const_cast<CSeqdesc*>(&(*di));
1522  d->SetMolinfo().SetTech(tech);
1523  return true;
1524  }
1525  }
1526  CRef<CSeqdesc> m(new CSeqdesc());
1527  m->SetMolinfo().SetTech(tech);
1528  if (bsh.IsSetInst() && bsh.GetInst().IsSetMol() && bsh.IsAa()) {
1530  }
1531  CBioseq_EditHandle eh = bsh.GetEditHandle();
1532  eh.AddSeqdesc(*m);
1533  return true;
1534 }
1535 
1536 
1538 {
1540  if (di) {
1541  if (di->GetMolinfo().IsSetTech() && di->GetMolinfo().GetBiomol() == biomol) {
1542  // no change necessary
1543  return false;
1544  } else {
1545  CSeqdesc* d = const_cast<CSeqdesc*>(&(*di));
1546  d->SetMolinfo().SetBiomol(biomol);
1547  return true;
1548  }
1549  }
1550  CRef<CSeqdesc> m(new CSeqdesc());
1551  m->SetMolinfo().SetBiomol(biomol);
1552  CBioseq_EditHandle eh = bsh.GetEditHandle();
1553  eh.AddSeqdesc(*m);
1554  return true;
1555 }
1556 
1557 
1558 bool CCleanup::AddMissingMolInfo(CBioseq& seq, bool is_product)
1559 {
1560  if (!seq.IsSetInst() || !seq.GetInst().IsSetMol()) {
1561  return false;
1562  }
1563  bool needs_molinfo = true;
1564 
1565  if (seq.IsSetDescr()) {
1567  if ((*it)->IsMolinfo()) {
1568  needs_molinfo = false;
1569  if (seq.IsAa() &&
1570  (!(*it)->GetMolinfo().IsSetBiomol() ||
1571  (*it)->GetMolinfo().GetBiomol() == CMolInfo::eBiomol_unknown)) {
1572  (*it)->SetMolinfo().SetBiomol(CMolInfo::eBiomol_peptide);
1573  }
1574  }
1575  }
1576  }
1577  if (needs_molinfo) {
1578  if (seq.IsAa()) {
1579  CRef<CSeqdesc> m(new CSeqdesc());
1581  if (is_product) {
1583  }
1584  seq.SetDescr().Set().push_back(m);
1585  } else if (seq.GetInst().GetMol() == CSeq_inst::eMol_rna && is_product) {
1586  CRef<CSeqdesc> m(new CSeqdesc());
1589  seq.SetDescr().Set().push_back(m);
1590  }
1591  }
1592 
1593  return needs_molinfo;
1594 }
1595 
1596 
1598 {
1599  if (!bsh.IsSetInst() || !bsh.GetInst().IsSetMol() || !bsh.IsAa()) {
1600  return false;
1601  }
1602  if (bsh.IsSetId()) {
1603  ITERATE(CBioseq_Handle::TId, it, bsh.GetId()) {
1604  // do not add titles for sequences with certain IDs
1605  switch (it->Which()) {
1606  case CSeq_id::e_Pir:
1607  case CSeq_id::e_Swissprot:
1608  case CSeq_id::e_Patent:
1609  case CSeq_id::e_Prf:
1610  case CSeq_id::e_Pdb:
1611  return false;
1612  break;
1613  default:
1614  break;
1615  }
1616  }
1617  }
1618 
1619  string new_defline = sequence::CDeflineGenerator().GenerateDefline(bsh, sequence::CDeflineGenerator::fIgnoreExisting);
1620  if (bsh.IsSetDescr()) {
1621  ITERATE(CBioseq_set::TDescr::Tdata, title_d, bsh.GetDescr().Get()) {
1622  if ((*title_d)->IsTitle()) {
1623  if (!NStr::Equal((*title_d)->GetTitle(), new_defline)) {
1624  CSeqdesc* d = const_cast<CSeqdesc*>(title_d->GetPointer());
1625  d->SetTitle(new_defline);
1626  return true;
1627  } else {
1628  return false;
1629  }
1630  }
1631  }
1632  }
1633 
1634  CRef<CSeqdesc> t(new CSeqdesc());
1635  t->SetTitle(new_defline);
1636  CBioseq_EditHandle eh = bsh.GetEditHandle();
1637  eh.AddSeqdesc(*t);
1638  return true;
1639 }
1640 
1641 
1643 {
1644  bool rval = false;
1645  if (seq_entry.IsSetDescr()) {
1646  CBioseq::TDescr::Tdata::iterator it = seq_entry.SetDescr().Set().begin();
1647  while (it != seq_entry.SetDescr().Set().end()) {
1648  if ((*it)->IsUser() && (*it)->GetUser().GetObjectType() == CUser_object::eObjectType_Cleanup){
1649  it = seq_entry.SetDescr().Set().erase(it);
1650  rval = true;
1651  }
1652  else {
1653  ++it;
1654  }
1655  }
1656  if (seq_entry.SetDescr().Set().empty()) {
1657  if (seq_entry.IsSeq()) {
1658  seq_entry.SetSeq().ResetDescr();
1659  }
1660  else if (seq_entry.IsSet()) {
1661  seq_entry.SetSet().ResetDescr();
1662  }
1663  }
1664  }
1665  if (seq_entry.IsSet() && seq_entry.GetSet().IsSetSeq_set()) {
1667  rval |= RemoveNcbiCleanupObject(**it);
1668  }
1669  }
1670  return rval;
1671 }
1672 
1673 
1674 void GetSourceDescriptors(const CSeq_entry& se, vector<const CSeqdesc* >& src_descs)
1675 {
1676  if (se.IsSetDescr()) {
1678  if ((*it)->IsSource() && (*it)->GetSource().IsSetOrg()) {
1679  src_descs.push_back(*it);
1680  }
1681  }
1682  }
1683 
1684  if (se.IsSet() && se.GetSet().IsSetSeq_set()) {
1686  GetSourceDescriptors(**it, src_descs);
1687  }
1688  }
1689 }
1690 
1691 
1693 {
1694  bool any_changes = false;
1695 
1696  vector<CRef<COrg_ref> > rq_list;
1697  vector<const CSeqdesc* > src_descs;
1698  vector<CConstRef<CSeq_feat> > src_feats;
1699 
1700  GetSourceDescriptors(*(seh.GetCompleteSeq_entry()), src_descs);
1701  vector<const CSeqdesc* >::iterator desc_it = src_descs.begin();
1702  while (desc_it != src_descs.end()) {
1703  // add org ref for descriptor to request list
1704  CRef<COrg_ref> org(new COrg_ref());
1705  org->Assign((*desc_it)->GetSource().GetOrg());
1706  rq_list.push_back(org);
1707 
1708  ++desc_it;
1709  }
1710 
1712  while (feat) {
1713  if (feat->GetData().GetBiosrc().IsSetOrg()) {
1714  // add org ref for feature to request list
1715  CRef<COrg_ref> org(new COrg_ref());
1716  org->Assign(feat->GetData().GetBiosrc().GetOrg());
1717  rq_list.push_back(org);
1718  // add feature to list
1719  src_feats.push_back(feat->GetOriginalSeq_feat());
1720  }
1721  ++feat;
1722  }
1723 
1724  if (rq_list.size() > 0) {
1725  CTaxon3 taxon3;
1726  taxon3.Init();
1727  CRef<CTaxon3_reply> reply = taxon3.SendOrgRefList(rq_list);
1728  if (reply) {
1729  CTaxon3_reply::TReply::const_iterator reply_it = reply->GetReply().begin();
1730 
1731  // process descriptor responses
1732  desc_it = src_descs.begin();
1733 
1734  while (reply_it != reply->GetReply().end()
1735  && desc_it != src_descs.end()) {
1736  if ((*reply_it)->IsData() &&
1737  !(*desc_it)->GetSource().GetOrg().Equals((*reply_it)->GetData().GetOrg())) {
1738  any_changes = true;
1739  CSeqdesc* desc = const_cast<CSeqdesc*>(*desc_it);
1740  desc->SetSource().SetOrg().Assign((*reply_it)->GetData().GetOrg());
1741  desc->SetSource().SetOrg().ResetSyn();
1742  }
1743  ++reply_it;
1744  ++desc_it;
1745  }
1746 
1747  // process feature responses
1748  vector<CConstRef<CSeq_feat> >::iterator feat_it = src_feats.begin();
1749  while (reply_it != reply->GetReply().end()
1750  && feat_it != src_feats.end()) {
1751  if ((*reply_it)->IsData() &&
1752  !(*feat_it)->GetData().GetBiosrc().GetOrg().Equals((*reply_it)->GetData().GetOrg())) {
1753  any_changes = true;
1754  CRef<CSeq_feat> new_feat(new CSeq_feat());
1755  new_feat->Assign(**feat_it);
1756  new_feat->SetData().SetBiosrc().SetOrg().Assign((*reply_it)->GetData().GetOrg());
1757  CSeq_feat_Handle fh = seh.GetScope().GetSeq_featHandle(**feat_it);
1758  CSeq_feat_EditHandle efh(fh);
1759  efh.Replace(*new_feat);
1760  }
1761  ++reply_it;
1762  ++feat_it;
1763  }
1764  }
1765  }
1766 
1767  return any_changes;
1768 }
1769 
1771 {
1772  CBioseq_Handle cds_bsh = scope.GetBioseqHandle(cds.GetLocation());
1773  if (!cds_bsh) {
1774  return CRef<CSeq_entry>(NULL);
1775  }
1776  CSeq_entry_Handle seh = cds_bsh.GetSeq_entry_Handle();
1777  if (!seh) {
1778  return CRef<CSeq_entry>(NULL);
1779  }
1780 
1781  CRef<CBioseq> new_product = CSeqTranslator::TranslateToProtein(cds, scope);
1782  CRef<CSeqdesc> molinfo(new CSeqdesc());
1785  new_product->SetDescr().Set().push_back(molinfo);
1786 
1787  if (cds.IsSetProduct()) {
1788  CRef<CSeq_id> prot_id(new CSeq_id());
1789  prot_id->Assign(*(cds.GetProduct().GetId()));
1790  new_product->SetId().push_back(prot_id);
1791  }
1792  CRef<CSeq_entry> prot_entry(new CSeq_entry());
1793  prot_entry->SetSeq(*new_product);
1794 
1796  if (!eh.IsSet()) {
1797  CBioseq_set_Handle nuc_parent = eh.GetParentBioseq_set();
1798  if (nuc_parent && nuc_parent.IsSetClass() && nuc_parent.GetClass() == objects::CBioseq_set::eClass_nuc_prot) {
1799  eh = nuc_parent.GetParentEntry().GetEditHandle();
1800  }
1801  }
1802  if (!eh.IsSet()) {
1803  eh.ConvertSeqToSet();
1804  // move all descriptors on nucleotide sequence except molinfo, title, and create-date to set
1805  eh.SetSet().SetClass(CBioseq_set::eClass_nuc_prot);
1807  if (set && set->IsSetSeq_set()) {
1808  CConstRef<CSeq_entry> nuc = set->GetSeq_set().front();
1810  CBioseq_set::TDescr::Tdata::const_iterator it = nuc->GetDescr().Get().begin();
1811  while (it != nuc->GetDescr().Get().end()) {
1812  if (!(*it)->IsMolinfo() && !(*it)->IsTitle() && !(*it)->IsCreate_date()) {
1813  CRef<CSeqdesc> copy(new CSeqdesc());
1814  copy->Assign(**it);
1815  eh.AddSeqdesc(*copy);
1816  neh.RemoveSeqdesc(**it);
1817  if (nuc->IsSetDescr()) {
1818  it = nuc->GetDescr().Get().begin();
1819  }
1820  else {
1821  break;
1822  }
1823  }
1824  else {
1825  ++it;
1826  }
1827  }
1828  }
1829  }
1830 
1831  CSeq_entry_EditHandle added = eh.AttachEntry(*prot_entry);
1832  return prot_entry;
1833 }
1834 
1835 
1836 CRef<objects::CSeq_id> GetNewProteinId(objects::CSeq_entry_Handle seh, objects::CBioseq_Handle bsh)
1837 {
1838  string id_base;
1839  objects::CSeq_id_Handle hid;
1840 
1841  ITERATE(objects::CBioseq_Handle::TId, it, bsh.GetId()) {
1842  if (!hid || !it->IsBetter(hid)) {
1843  hid = *it;
1844  }
1845  }
1846 
1847  hid.GetSeqId()->GetLabel(&id_base, objects::CSeq_id::eContent);
1848 
1849  int offset = 1;
1850  string id_label = id_base + "_" + NStr::NumericToString(offset);
1851  CRef<objects::CSeq_id> id(new objects::CSeq_id());
1852  id->SetLocal().SetStr(id_label);
1853  objects::CBioseq_Handle b_found = seh.GetBioseqHandle(*id);
1854  while (b_found) {
1855  offset++;
1856  id_label = id_base + "_" + NStr::NumericToString(offset);
1857  id->SetLocal().SetStr(id_label);
1858  b_found = seh.GetBioseqHandle(*id);
1859  }
1860  return id;
1861 }
1862 
1863 
1865 {
1866  if (!bsh) {
1867  return false;
1868  }
1869  if (!bsh.IsNa()) {
1870  return false;
1871  }
1872 
1873  int bioseqGenCode = 0;
1874  CSeqdesc_CI src(bsh, CSeqdesc::e_Source);
1875  if (src) {
1876  bioseqGenCode = src->GetSource().GetGenCode();
1877  }
1878 
1879  bool any_changed = false;
1880  // set Cdregion's gcode from BioSource (unless except-text)
1882  CFeat_CI feat_ci(bsh, sel);
1883  for (; feat_ci; ++feat_ci) {
1884  const CSeq_feat& feat = feat_ci->GetOriginalFeature();
1885  const CCdregion& cds = feat.GetData().GetCdregion();
1886  int cdregionGenCode = (cds.IsSetCode() ?
1887  cds.GetCode().GetId() :
1888  0);
1889  if (cdregionGenCode != bioseqGenCode)
1890  {
1891  // make cdregion's gencode match bioseq's gencode,
1892  // if allowed
1893  if (!feat.HasExceptionText("genetic code exception"))
1894  {
1895  CRef<CSeq_feat> new_feat(new CSeq_feat);
1896  new_feat->Assign(feat);
1897  CCdregion& new_cds = new_feat->SetData().SetCdregion();
1898  new_cds.ResetCode();
1899  new_cds.SetCode().SetId(bioseqGenCode);
1900  CSeq_feat_EditHandle edit_handle(*feat_ci);
1901  edit_handle.Replace(*new_feat);
1902  any_changed = true;
1903  }
1904  }
1905  }
1906  return any_changed;
1907 }
1908 
1909 
1910 // return position of " [" + sOrganism + "]", but only if it's
1911 // at the end and there are characters before it.
1912 // Also, returns the position of the organelle prefix in the title.
1914  const string & sTitle,
1915  const string & sOrganism,
1916  SIZE_TYPE * out_piOrganellePos)
1917 {
1918  if (out_piOrganellePos) {
1919  *out_piOrganellePos = NPOS;
1920  }
1921 
1922  SIZE_TYPE answer = NPOS;
1923 
1924  const string sPattern = " [" + sOrganism + "]";
1925  if (NStr::EndsWith(sTitle, sPattern, NStr::eNocase)) {
1926  answer = sTitle.length() - sPattern.length();
1927  if (answer < 1) {
1928  // title must have something before the pattern
1929  answer = NPOS;
1930  }
1931  } else {
1932  answer = NStr::FindNoCase(sTitle, sPattern, 0, NPOS, NStr::eLast);
1933  if (answer < 1 || answer == NPOS) {
1934  // pattern not found
1935  answer = NPOS;
1936  }
1937  }
1938 
1939  // find organelle prefix
1940  if (out_piOrganellePos) {
1941  for (unsigned int genome = CBioSource::eGenome_chloroplast;
1943  genome++) {
1944  if (genome != CBioSource::eGenome_extrachrom &&
1945  genome != CBioSource::eGenome_transposon &&
1947  genome != CBioSource::eGenome_proviral &&
1948  genome != CBioSource::eGenome_virion &&
1950  {
1951  string organelle = " (" + CBioSource::GetOrganelleByGenome(genome) + ")";
1952  SIZE_TYPE possible_organelle_start_pos = NStr::Find(sTitle, organelle);
1953  if (possible_organelle_start_pos != NPOS &&
1954  NStr::EndsWith(CTempString(sTitle, 0, answer), organelle)) {
1955  *out_piOrganellePos = possible_organelle_start_pos;
1956  break;
1957  }
1958 
1959  }
1960  }
1961  }
1962 
1963  return answer;
1964 }
1965 
1966 static void s_RemoveOrgFromEndOfProtein(CBioseq& seq, string taxname)
1967 
1968 {
1969  if (taxname.empty()) return;
1970  SIZE_TYPE taxlen = taxname.length();
1971 
1972  EDIT_EACH_SEQANNOT_ON_BIOSEQ(annot_it, seq) {
1973  CSeq_annot& annot = **annot_it;
1974  if (!annot.IsFtable()) continue;
1975  EDIT_EACH_FEATURE_ON_ANNOT(feat_it, annot) {
1976  CSeq_feat& feat = **feat_it;
1977  CSeqFeatData& data = feat.SetData();
1978  if (!data.IsProt()) continue;
1979  CProt_ref& prot_ref = data.SetProt();
1980  EDIT_EACH_NAME_ON_PROTREF(it, prot_ref) {
1981  string str = *it;
1982  if (str.empty()) continue;
1983  int len = str.length();
1984  if (len < 5) continue;
1985  if (str[len - 1] != ']') continue;
1986  SIZE_TYPE cp = NStr::Find(str, "[", 0, NPOS, NStr::eLast);
1987  if (cp == NPOS) continue;
1988  string suffix = str.substr(cp + 1);
1989  if (NStr::StartsWith(suffix, "NAD")) continue;
1990  if (suffix.length() != taxlen + 1) continue;
1991  if (NStr::StartsWith(suffix, taxname)) {
1992  str.erase(cp);
1994  *it = str;
1995  }
1996  }
1997  }
1998  }
1999 }
2000 
2002 {
2003  // Bail if not protein
2004  if (!FIELD_CHAIN_OF_2_IS_SET(bioseq, Inst, Mol) ||
2005  bioseq.GetInst().GetMol() != NCBI_SEQMOL(aa))
2006  {
2007  return false;
2008  }
2009 
2010  // Bail if record is swissprot
2011  FOR_EACH_SEQID_ON_BIOSEQ(seqid_itr, bioseq) {
2012  const CSeq_id& seqid = **seqid_itr;
2013  if (FIELD_IS(seqid, Swissprot)) {
2014  return false;
2015  }
2016  }
2017 
2018  // gather some info from the Seqdesc's on the bioseq, into
2019  // the following variables
2020  bool bPartial = false;
2021  string sTaxname;
2022  string sOldName;
2023  string *psTitle = NULL;
2024  string organelle = kEmptyStr;
2025 
2026  // iterate for title
2027  EDIT_EACH_SEQDESC_ON_BIOSEQ(descr_iter, bioseq) {
2028  CSeqdesc &descr = **descr_iter;
2029  if (descr.IsTitle()) {
2030  psTitle = &GET_MUTABLE(descr, Title);
2031  }
2032  }
2033 
2034  // iterate Seqdescs from bottom to top
2035  // accumulate seqdescs into here
2036  typedef vector< CConstRef<CSeqdesc> > TSeqdescVec;
2037  TSeqdescVec vecSeqdesc;
2038  {
2039  FOR_EACH_SEQDESC_ON_BIOSEQ(descr_iter, bioseq) {
2040  vecSeqdesc.push_back(CConstRef<CSeqdesc>(&**descr_iter));
2041  }
2042  // climb up to get parent Seqdescs
2043  CConstRef<CBioseq_set> bioseq_set(bioseq.GetParentSet());
2044  for (; bioseq_set; bioseq_set = bioseq_set->GetParentSet()) {
2045  FOR_EACH_SEQDESC_ON_SEQSET(descr_iter, *bioseq_set) {
2046  vecSeqdesc.push_back(CConstRef<CSeqdesc>(&**descr_iter));
2047  }
2048  }
2049  }
2050 
2051  ITERATE(TSeqdescVec, descr_iter, vecSeqdesc) {
2052  const CSeqdesc &descr = **descr_iter;
2053  if (descr.IsMolinfo() && FIELD_IS_SET(descr.GetMolinfo(), Completeness)) {
2054  switch (GET_FIELD(descr.GetMolinfo(), Completeness)) {
2055  case NCBI_COMPLETENESS(partial):
2056  case NCBI_COMPLETENESS(no_left):
2057  case NCBI_COMPLETENESS(no_right):
2058  case NCBI_COMPLETENESS(no_ends):
2059  bPartial = true;
2060  break;
2061  default:
2062  break;
2063  }
2064  // stop at first molinfo
2065  break;
2066  }
2067  }
2068 
2069  ITERATE(TSeqdescVec, descr_iter, vecSeqdesc) {
2070  const CSeqdesc &descr = **descr_iter;
2071  if (descr.IsSource()) {
2072  const TBIOSOURCE_GENOME genome = (descr.GetSource().CanGetGenome() ?
2073  descr.GetSource().GetGenome() :
2075  if (genome >= CBioSource::eGenome_chloroplast &&
2077  genome != CBioSource::eGenome_extrachrom &&
2078  genome != CBioSource::eGenome_transposon &&
2080  genome != CBioSource::eGenome_proviral &&
2081  genome != CBioSource::eGenome_virion &&
2083  {
2084  organelle = CBioSource::GetOrganelleByGenome(genome);
2085  }
2086 
2087  if (FIELD_IS_SET(descr.GetSource(), Org)) {
2088  const COrg_ref & org = GET_FIELD(descr.GetSource(), Org);
2089  if (!RAW_FIELD_IS_EMPTY_OR_UNSET(org, Taxname)) {
2090  sTaxname = GET_FIELD(org, Taxname);
2091  }
2092  if (NStr::StartsWith(sTaxname, organelle, NStr::eNocase)) {
2093  organelle = kEmptyStr;
2094  }
2095  FOR_EACH_ORGMOD_ON_ORGREF(mod_iter, org) {
2096  const COrgMod & orgmod = **mod_iter;
2097  if (FIELD_EQUALS(orgmod, Subtype, NCBI_ORGMOD(old_name))) {
2098  sOldName = GET_FIELD(orgmod, Subname);
2099  }
2100  }
2101  }
2102  // stop at first source
2103  break;
2104  }
2105  }
2106 
2107  s_RemoveOrgFromEndOfProtein(bioseq, sTaxname);
2108 
2109  // bail if no title
2110  if ((NULL == psTitle) || psTitle->empty()) {
2111  return false;
2112  }
2113 
2114  // put title into a reference,
2115  // just because it's more convenient than a pointer
2116  string & sTitle = *psTitle;
2117  // remember original so we can see if we changed it
2118  const string sOriginalTitle = sTitle;
2119 
2120  // search for partial, must be just before bracketed organism
2121  SIZE_TYPE partialPos = NStr::Find(sTitle, ", partial [");
2122  if (partialPos == NPOS) {
2123  partialPos = NStr::Find(sTitle, ", partial (");
2124  }
2125 
2126  // find oldname or taxname in brackets at end of protein title
2127  SIZE_TYPE penult = NPOS;
2128  SIZE_TYPE suffixPos = NPOS; // will point to " [${organism name}]" at end
2129  if (!sOldName.empty() && !sTaxname.empty()) {
2130  suffixPos = s_TitleEndsInOrganism(sTitle, sOldName, &penult);
2131  }
2132  if (suffixPos == NPOS && !sTaxname.empty()) {
2133  suffixPos = s_TitleEndsInOrganism(sTitle, sTaxname, &penult);
2134  if (suffixPos != NPOS) {
2135  if (NStr::IsBlank(organelle) && penult != NPOS) {
2136  } else if (!NStr::IsBlank(organelle) && penult == NPOS) {
2137  } else if (penult != NPOS && sTitle.substr(penult) == organelle) {
2138  } else {
2139  // bail if no need to change partial text or [organism name]
2140  if (bPartial && partialPos != NPOS) {
2141  return false;
2142  } else if (!bPartial && partialPos == NPOS){
2143  return false;
2144  }
2145  }
2146  }
2147  }
2148  // do not change unless [genus species] was at the end
2149  if (suffixPos == NPOS) {
2150  return false;
2151  }
2152 
2153  // truncate bracketed info from end of title, will replace with current taxname
2154  sTitle.resize(suffixPos);
2155  if (penult != NPOS) {
2156  sTitle.resize(penult);
2157  }
2158 
2159  // if ", partial [" was indeed just before the [genus species], it will now be ", partial"
2160  // Note: 9 is length of ", partial"
2161  if (!bPartial &&
2162  partialPos != string::npos &&
2163  (partialPos == (sTitle.length() - 9)))
2164  {
2165  sTitle.resize(partialPos);
2166  }
2168 
2169  //
2170  if (bPartial && partialPos == NPOS) {
2171  sTitle += ", partial";
2172  }
2173  if (!NStr::IsBlank(organelle)) {
2174  sTitle += " (" + string(organelle) + ")";
2175  }
2176  if (!sTaxname.empty()) {
2177  sTitle += " [" + sTaxname + "]";
2178  }
2179 
2180  if (sTitle != sOriginalTitle) {
2181  return true;
2182  } else {
2183  return false;
2184  }
2185 }
2186 
2188 {
2189  if (!IsPseudo(cds, scope) ||
2190  !cds.IsSetData() || !cds.GetData().IsCdregion() ||
2191  !cds.IsSetProduct()) {
2192  return false;
2193  }
2194  CBioseq_Handle pseq = scope.GetBioseqHandle(cds.GetProduct());
2195  if (pseq) {
2197  if (prot) {
2198  string label;
2199  if (prot->GetData().GetProt().IsSetName() &&
2200  !prot->GetData().GetProt().GetName().empty()) {
2201  label = prot->GetData().GetProt().GetName().front();
2202  } else if (prot->GetData().GetProt().IsSetDesc()) {
2203  label = prot->GetData().GetProt().GetDesc();
2204  }
2205  if (!NStr::IsBlank(label)) {
2206  if (cds.IsSetComment() && !NStr::IsBlank(cds.GetComment())) {
2207  cds.SetComment(cds.GetComment() + "; " + label);
2208  } else {
2209  cds.SetComment(label);
2210  }
2211  }
2212  }
2213  CBioseq_EditHandle pseq_e(pseq);
2214  pseq_e.Remove();
2215  }
2216  cds.ResetProduct();
2217  return true;
2218 }
2219 
2220 
2222 {
2223  bool any_changes = false;
2224 
2226  for (CFeat_CI cds_it(entry, sel); cds_it; ++cds_it) {
2227  bool change_this_cds = false;
2228  CRef<CSeq_feat> new_cds(new CSeq_feat());
2229  new_cds->Assign(*(cds_it->GetSeq_feat()));
2230  if (IsPseudo(*(cds_it->GetSeq_feat()), entry.GetScope())) {
2231  change_this_cds = RemovePseudoProduct(*new_cds, entry.GetScope());
2232  } else {
2233  change_this_cds |= SetBestFrame(*new_cds, entry.GetScope());
2234 
2235  change_this_cds |= SetCDSPartialsByFrameAndTranslation(*new_cds, entry.GetScope());
2236 
2237  // retranslate
2238  if (new_cds->IsSetProduct() && entry.GetScope().GetBioseqHandle(new_cds->GetProduct())) {
2239  any_changes |= feature::RetranslateCDS(*new_cds, entry.GetScope());
2240  } else {
2241  // need to set product if not set
2242  if (!new_cds->IsSetProduct() && !IsPseudo(*new_cds, entry.GetScope())) {
2243  CRef<CSeq_id> new_id = GetNewProteinId(entry, entry.GetScope().GetBioseqHandle(new_cds->GetLocation()));
2244  if (new_id) {
2245  new_cds->SetProduct().SetWhole().Assign(*new_id);
2246  change_this_cds = true;
2247  }
2248  }
2249  if (new_cds->IsSetProduct()) {
2250  CRef<CSeq_entry> prot = AddProtein(*new_cds, entry.GetScope());
2251  if (prot) {
2252  any_changes = true;
2253  }
2254  }
2255  any_changes |= feature::AdjustForCDSPartials(*new_cds, entry);
2256  }
2257  //prefer ncbieaa
2258  if (new_cds->IsSetProduct()) {
2259  CBioseq_Handle p = entry.GetScope().GetBioseqHandle(new_cds->GetProduct());
2260  if (p.GetInst().IsSetSeq_data() && p.GetInst().GetSeq_data().IsIupacaa()) {
2261  CBioseq_EditHandle peh(p);
2262  string current = p.GetInst().GetSeq_data().GetIupacaa().Get();
2263  CRef<CSeq_inst> new_inst(new CSeq_inst());
2264  new_inst->Assign(p.GetInst());
2265  new_inst->SetSeq_data().SetNcbieaa().Set(current);
2266  peh.SetInst(*new_inst);
2267  any_changes = true;
2268  }
2269  }
2270 
2271  string current_name = GetProteinName(*new_cds, entry.GetScope());
2272  if (NStr::IsBlank(current_name)) {
2273  SetProteinName(*new_cds, "hypothetical protein", false, entry.GetScope());
2274  current_name = "hypothetical protein";
2275  change_this_cds = true;
2276  }
2277 
2278  CConstRef<CSeq_feat> mrna = sequence::GetmRNAforCDS(*(cds_it->GetSeq_feat()), entry.GetScope());
2279  if (mrna) {
2280  bool change_mrna = false;
2281  CRef<CSeq_feat> new_mrna(new CSeq_feat());
2282  new_mrna->Assign(*mrna);
2283  // Make mRNA name match coding region protein
2284  string mrna_name = new_mrna->GetData().GetRna().GetRnaProductName();
2285  if (NStr::IsBlank(mrna_name)
2286  || (!NStr::Equal(current_name, "hypothetical protein") &&
2287  !NStr::Equal(current_name, mrna_name))) {
2288  string remainder;
2289  new_mrna->SetData().SetRna().SetRnaProductName(current_name, remainder);
2290  change_mrna = true;
2291  }
2292  // Adjust mRNA partials to match coding region
2293  change_mrna |= feature::CopyFeaturePartials(*new_mrna, *new_cds);
2294  if (change_mrna) {
2295  CSeq_feat_Handle fh = entry.GetScope().GetSeq_featHandle(*mrna);
2296  CSeq_feat_EditHandle feh(fh);
2297  feh.Replace(*new_mrna);
2298  any_changes = true;
2299  }
2300  }
2301  }
2302 
2303  if (change_this_cds) {
2304  CSeq_feat_EditHandle cds_h(*cds_it);
2305 
2306  cds_h.Replace(*new_cds);
2307  any_changes = true;
2308 
2309  //also need to redo protein title
2310  }
2311 
2312  }
2313 
2314  for (CFeat_CI gene_it(entry, SAnnotSelector(CSeqFeatData::e_Gene)); gene_it; ++gene_it) {
2315  bool change_this_gene;
2316  CRef<CSeq_feat> new_gene(new CSeq_feat());
2317  new_gene->Assign(*(gene_it->GetSeq_feat()));
2318 
2319  change_this_gene = SetGenePartialByLongestContainedFeature(*new_gene, entry.GetScope());
2320 
2321  if (change_this_gene) {
2322  CSeq_feat_EditHandle gene_h(*gene_it);
2323  gene_h.Replace(*new_gene);
2324  any_changes = true;
2325  }
2326  }
2327 
2328  NormalizeDescriptorOrder(entry);
2329 
2330  for (CBioseq_CI bi(entry, CSeq_inst::eMol_na); bi; ++bi) {
2331  any_changes |= SetGeneticCodes(*bi);
2332  }
2333 
2335  CNewCleanup_imp exclean(changes, 0);
2336  exclean.ExtendedCleanup(entry);
2337 
2338  return any_changes;
2339 }
2340 
2341 // maps the type of seqdesc to the order it should be in
2342 // (lowest to highest)
2344 static const TSeqdescOrderElem sc_seqdesc_order_map[] = {
2345  // Note that ordering must match ordering
2346  // in CSeqdesc::E_Choice
2347  { CSeqdesc::e_Mol_type, 13 },
2348  { CSeqdesc::e_Modif, 14 },
2349  { CSeqdesc::e_Method, 15 },
2350  { CSeqdesc::e_Name, 7 },
2351  { CSeqdesc::e_Title, 1 },
2352  { CSeqdesc::e_Org, 16 },
2353  { CSeqdesc::e_Comment, 6 },
2354  { CSeqdesc::e_Num, 11 },
2355  { CSeqdesc::e_Maploc, 9 },
2356  { CSeqdesc::e_Pir, 18 },
2357  { CSeqdesc::e_Genbank, 22 },
2358  { CSeqdesc::e_Pub, 5 },
2359  { CSeqdesc::e_Region, 10 },
2360  { CSeqdesc::e_User, 8 },
2361  { CSeqdesc::e_Sp, 17 },
2362  { CSeqdesc::e_Dbxref, 12 },
2363  { CSeqdesc::e_Embl, 21 },
2364  { CSeqdesc::e_Create_date, 24 },
2365  { CSeqdesc::e_Update_date, 25 },
2366  { CSeqdesc::e_Prf, 19 },
2367  { CSeqdesc::e_Pdb, 20 },
2368  { CSeqdesc::e_Het, 4 },
2369  { CSeqdesc::e_Source, 2 },
2370  { CSeqdesc::e_Molinfo, 3 },
2371  { CSeqdesc::e_Modelev, 23 }
2372 };
2374 DEFINE_STATIC_ARRAY_MAP(TSeqdescOrderMap, sc_SeqdescOrderMap, sc_seqdesc_order_map);
2375 
2376 static
2378  // ordering assigned to unknown
2379  const int unknown_seqdesc = (1 + sc_SeqdescOrderMap.size());
2380 
2381  TSeqdescOrderMap::const_iterator find_iter = sc_SeqdescOrderMap.find(desc->Which());
2382  if (find_iter == sc_SeqdescOrderMap.end()) {
2383  return unknown_seqdesc;
2384  }
2385 
2386  return find_iter->second;
2387 }
2388 
2389 static
2390 bool s_SeqDescLessThan(const CRef<CSeqdesc> &desc1, const CRef<CSeqdesc> &desc2)
2391 {
2392  return (s_SeqDescToOrdering(desc1) < s_SeqDescToOrdering(desc2));
2393 }
2394 
2396 {
2397  bool rval = false;
2398  if (!seq_mac_is_sorted(descr.Set().begin(), descr.Set().end(), s_SeqDescLessThan)) {
2399  descr.Set().sort(s_SeqDescLessThan);
2400  rval = true;
2401  }
2402  return rval;
2403 }
2404 
2406 {
2407  bool rval = false;
2408 
2409  CSeq_entry_CI ci(seh);
2410  while (ci) {
2411  CSeq_entry_EditHandle edit(*ci);
2412  if (edit.IsSetDescr()) {
2413  rval |= NormalizeDescriptorOrder(edit.SetDescr());
2414  }
2415  ++ci;
2416  }
2417 
2418  return rval;
2419 }
2420 
2421 
2423 {
2424  bool removed = false;
2425  if (seq.IsSetDescr()) {
2426  CConstRef<CSeqdesc> last_title(NULL);
2428  if ((*d)->IsTitle()) {
2429  if (last_title) {
2430  seq.RemoveSeqdesc(*last_title);
2431  removed = true;
2432  }
2433  last_title.Reset(d->GetPointer());
2434  }
2435  }
2436  }
2437  return removed;
2438 }
2439 
2440 
2442 {
2443  bool removed = false;
2444  if (set.IsSetDescr()) {
2445  CConstRef<CSeqdesc> last_title(NULL);
2447  if ((*d)->IsTitle()) {
2448  if (last_title) {
2449  set.RemoveSeqdesc(*last_title);
2450  removed = true;
2451  }
2452  last_title.Reset(d->GetPointer());
2453  }
2454  }
2455  }
2456  return removed;
2457 }
2458 
2459 
2461 {
2462  if (seh.IsSet() && seh.GetSet().IsSetClass() &&
2464  return false;
2465  }
2466  CSeq_entry_EditHandle eh(seh);
2468  return true;
2469 }
2470 
2471 
2472 void s_GetAuthorsString(string *out_authors, const CAuth_list& auth_list)
2473 {
2474  string & auth_str = *out_authors;
2475  auth_str.clear();
2476 
2477  if (!auth_list.IsSetNames()) {
2478  return;
2479  }
2480 
2481  vector<string> name_list;
2482 
2483  if (auth_list.GetNames().IsStd()) {
2484  ITERATE(CAuth_list::TNames::TStd, auth_it, auth_list.GetNames().GetStd()) {
2485  if ((*auth_it)->IsSetName()) {
2486  string label = "";
2487  (*auth_it)->GetName().GetLabel(&label);
2488  name_list.push_back(label);
2489  }
2490  }
2491  } else if (auth_list.GetNames().IsMl()) {
2492  copy(BEGIN_COMMA_END(auth_list.GetNames().GetMl()),
2493  back_inserter(name_list));
2494  } else if (auth_list.GetNames().IsStr()) {
2495  copy(BEGIN_COMMA_END(auth_list.GetNames().GetStr()),
2496  back_inserter(name_list));
2497  }
2498 
2499  if (name_list.size() == 0) {
2500  return;
2501  } else if (name_list.size() == 1) {
2502  auth_str = name_list.back();
2503  return;
2504  }
2505 
2506  // join most of them by commas, but the last one gets an "and"
2507  string last_author;
2508  last_author.swap(name_list.back());
2509  name_list.pop_back();
2510  // swap is faster than assignment
2511  NStr::Join(name_list, ", ").swap(auth_str);
2512  auth_str += "and ";
2513  auth_str += last_author;
2514 
2515  return;
2516 }
2517 
2518 
2520  string *out_authors_string, const CPubdesc& pd)
2521 {
2522  string & authors_string = *out_authors_string;
2523  authors_string.clear();
2524 
2525  FOR_EACH_PUB_ON_PUBDESC(pub, pd) {
2526  if ((*pub)->IsSetAuthors()) {
2527  s_GetAuthorsString(&authors_string, (*pub)->GetAuthors());
2528  break;
2529  }
2530  }
2531 }
2532 
2533 
2535 (const CPubdesc& pd,
2536 vector<int>& pmids, vector<int>& muids, vector<int>& serials,
2537 vector<string>& published_labels,
2538 vector<string>& unpublished_labels)
2539 {
2540  string label = "";
2541  bool is_published = false;
2542  bool need_label = false;
2543 
2544  if (!pd.IsSetPub()) {
2545  return;
2546  }
2547  ITERATE(CPubdesc::TPub::Tdata, it, pd.GetPub().Get()) {
2548  if ((*it)->IsPmid()) {
2549  pmids.push_back((*it)->GetPmid());
2550  is_published = true;
2551  } else if ((*it)->IsMuid()) {
2552  muids.push_back((*it)->GetMuid());
2553  is_published = true;
2554  } else if ((*it)->IsGen()) {
2555  if ((*it)->GetGen().IsSetCit()
2556  && NStr::StartsWith((*it)->GetGen().GetCit(), "BackBone id_pub", NStr::eNocase)) {
2557  need_label = true;
2558  }
2559  if ((*it)->GetGen().IsSetSerial_number()) {
2560  serials.push_back((*it)->GetGen().GetSerial_number());
2561  if ((*it)->GetGen().IsSetCit()
2562  || (*it)->GetGen().IsSetJournal()
2563  || (*it)->GetGen().IsSetDate()) {
2564  need_label = true;
2565  }
2566  } else {
2567  need_label = true;
2568  }
2569  } else if ((*it)->IsArticle() && (*it)->GetArticle().IsSetIds()) {
2570  is_published = true;
2571  ITERATE(CArticleIdSet::Tdata, id, (*it)->GetArticle().GetIds().Get()) {
2572  if ((*id)->IsPubmed()) {
2573  pmids.push_back((*id)->GetPubmed());
2574  is_published = true;
2575  } else if ((*id)->IsMedline()) {
2576  muids.push_back((*id)->GetMedline());
2577  }
2578  }
2579  need_label = true;
2580  } else {
2581  need_label = true;
2582  }
2583  if (need_label && NStr::IsBlank(label)) {
2584  // create unique label
2585  (*it)->GetLabel(&label, CPub::eContent, true);
2586  string auth_str;
2587  s_GetAuthorsString(&auth_str, pd);
2588  label += "; ";
2589  label += auth_str;
2590  }
2591  }
2592  if (!NStr::IsBlank(label)) {
2593  if (is_published) {
2594  published_labels.push_back(label);
2595  } else {
2596  unpublished_labels.push_back(label);
2597  }
2598  }
2599 }
2600 
2601 
2602 vector<CConstRef<CPub> > CCleanup::GetCitationList(CBioseq_Handle bsh)
2603 {
2604  vector<CConstRef<CPub> > pub_list;
2605 
2606  // first get descriptor pubs
2607  CSeqdesc_CI di(bsh, CSeqdesc::e_Pub);
2608  while (di) {
2609  vector<int> pmids;
2610  vector<int> muids;
2611  vector<int> serials;
2612  vector<string> published_labels;
2613  vector<string> unpublished_labels;
2614  GetPubdescLabels(di->GetPub(), pmids, muids, serials, published_labels, unpublished_labels);
2615  if (pmids.size() > 0) {
2616  CRef<CPub> pub(new CPub());
2617  pub->SetPmid().Set(pmids[0]);
2618  pub_list.push_back(pub);
2619  } else if (muids.size() > 0) {
2620  CRef<CPub> pub(new CPub());
2621  pub->SetMuid(muids[0]);
2622  pub_list.push_back(pub);
2623  } else if (serials.size() > 0) {
2624  CRef<CPub> pub(new CPub());
2625  pub->SetGen().SetSerial_number(serials[0]);
2626  pub_list.push_back(pub);
2627  } else if (published_labels.size() > 0) {
2628  CRef<CPub> pub(new CPub());
2629  pub->SetGen().SetCit(published_labels[0]);
2630  pub_list.push_back(pub);
2631  } else if (unpublished_labels.size() > 0) {
2632  CRef<CPub> pub(new CPub());
2633  pub->SetGen().SetCit(unpublished_labels[0]);
2634  pub_list.push_back(pub);
2635  }
2636 
2637  ++di;
2638  }
2639  // now get pub features
2641  while (fi) {
2642  vector<int> pmids;
2643  vector<int> muids;
2644  vector<int> serials;
2645  vector<string> published_labels;
2646  vector<string> unpublished_labels;
2647  GetPubdescLabels(fi->GetData().GetPub(), pmids, muids, serials, published_labels, unpublished_labels);
2648  if (pmids.size() > 0) {
2649  CRef<CPub> pub(new CPub());
2650  pub->SetPmid().Set(pmids[0]);
2651  pub_list.push_back(pub);
2652  } else if (muids.size() > 0) {
2653  CRef<CPub> pub(new CPub());
2654  pub->SetMuid(muids[0]);
2655  pub_list.push_back(pub);
2656  } else if (serials.size() > 0) {
2657  CRef<CPub> pub(new CPub());
2658  pub->SetGen().SetSerial_number(serials[0]);
2659  pub_list.push_back(pub);
2660  } else if (published_labels.size() > 0) {
2661  CRef<CPub> pub(new CPub());
2662  pub->SetGen().SetCit(published_labels[0]);
2663  pub_list.push_back(pub);
2664  } else if (unpublished_labels.size() > 0) {
2665  CRef<CPub> pub(new CPub());
2666  pub->SetGen().SetCit(unpublished_labels[0]);
2667  pub_list.push_back(pub);
2668  }
2669 
2670  ++fi;
2671  }
2672  return pub_list;
2673 }
2674 
2675 
2677 {
2678  bool any_change = false;
2679  CSeq_descr::Tdata::iterator it1 = descr.Set().begin();
2680  while (it1 != descr.Set().end()) {
2681  if ((*it1)->IsPub()) {
2682  CSeq_descr::Tdata::iterator it2 = it1;
2683  ++it2;
2684  while (it2 != descr.Set().end()) {
2685  if ((*it2)->IsPub() && (*it1)->GetPub().Equals((*it2)->GetPub())) {
2686  it2 = descr.Set().erase(it2);
2687  any_change = true;
2688  } else {
2689  ++it2;
2690  }
2691  }
2692  }
2693  ++it1;
2694  }
2695  return any_change;
2696 }
2697 
2698 
2699 bool s_FirstPubMatchesSecond(const CPubdesc& pd1, const CPubdesc& pd2)
2700 {
2701  if (pd1.Equals(pd2)) {
2702  return true;
2703  } else if (pd1.IsSetPub() && pd2.IsSetPub() && pd1.GetPub().Get().size() == 1) {
2704  ITERATE(CPubdesc::TPub::Tdata, it, pd2.GetPub().Get()) {
2705  if (pd1.GetPub().Get().front()->Equals(**it)) {
2706  return true;
2707  }
2708  }
2709  }
2710  return false;
2711 }
2712 
2713 
2714 bool CCleanup::PubAlreadyInSet(const CPubdesc& pd, const CSeq_descr& descr)
2715 {
2716  ITERATE(CSeq_descr::Tdata, d, descr.Get()) {
2717  if ((*d)->IsPub() && s_FirstPubMatchesSecond(pd, (*d)->GetPub())) {
2718  return true;
2719  }
2720  }
2721  return false;
2722 }
2723 
2724 
2726 {
2727  bool is_embl_or_ddbj = false;
2728  ITERATE(CBioseq::TId, id, b.GetId()) {
2729  if ((*id)->IsEmbl() || (*id)->IsDdbj()) {
2730  is_embl_or_ddbj = true;
2731  break;
2732  }
2733  }
2734  return !is_embl_or_ddbj;
2735 }
2736 
2737 
2739 {
2740  if (pd.IsSetNum() || pd.IsSetName() || pd.IsSetFig() || pd.IsSetComment()) {
2741  return false;
2742  } else {
2743  return true;
2744  }
2745 }
2746 
2747 
2749 {
2750  // add descriptor to nuc-prot parent or sequence itself
2753  // add to sequence
2754  CBioseq_EditHandle eh(b);
2755  eh.AddSeqdesc(*d);
2758  } else if (parent && parent.IsSetClass() &&
2759  parent.GetClass() == CBioseq_set::eClass_nuc_prot &&
2760  parent.IsSetDescr() && PubAlreadyInSet(d->GetPub(), parent.GetDescr())) {
2761  // don't add descriptor, just delete feature
2762  } else if (OkToPromoteNpPub((d)->GetPub()) &&
2763  parent && parent.IsSetClass() &&
2764  parent.GetClass() == CBioseq_set::eClass_nuc_prot) {
2765  CBioseq_set_EditHandle eh(parent);
2766  eh.AddSeqdesc(*d);
2769  } else {
2770  CBioseq_EditHandle eh(b);
2771  eh.AddSeqdesc(*d);
2774  }
2775  if (remove_feat) {
2776  // remove feature
2777  CSeq_feat_EditHandle feh(feat);
2778  feh.Remove();
2779  }
2780 }
2781 
2782 
2784 {
2785  bool any_change = false;
2786  for (CBioseq_CI b(seh); b; ++b) {
2787  for (CFeat_CI p(*b, CSeqFeatData::e_Pub); p; ++p) {
2788  if (p->GetLocation().IsInt() &&
2789  p->GetLocation().GetStart(eExtreme_Biological) == 0 &&
2790  p->GetLocation().GetStop(eExtreme_Biological) == b->GetBioseqLength() - 1) {
2791  CRef<CSeqdesc> d(new CSeqdesc());
2792  d->SetPub().Assign(p->GetData().GetPub());
2793  if (p->IsSetComment()) {
2794  if (d->GetPub().IsSetComment() && !NStr::IsBlank(d->GetPub().GetComment())) {
2795  d->SetPub().SetComment(d->GetPub().GetComment() + "; " + p->GetComment());
2796  } else {
2797  d->SetPub().SetComment();
2798  }
2799  }
2800  MoveOneFeatToPubdesc(*p, d, *b);
2801  any_change = true;
2802  }
2803  }
2804  }
2805  return any_change;
2806 }
2807 
2808 
2809 bool IsSiteRef(const CSeq_feat& sf)
2810 {
2811  if (sf.GetData().IsImp() &&
2812  sf.GetData().GetImp().IsSetKey() &&
2813  NStr::Equal(sf.GetData().GetImp().GetKey(), "Site-ref")) {
2814  return true;
2815  } else {
2816  return false;
2817  }
2818 }
2819 
2820 
2821 bool CCleanup::IsMinPub(const CPubdesc& pd, bool is_refseq_prot)
2822 {
2823  if (!pd.IsSetPub()) {
2824  return true;
2825  }
2826  bool found_non_minimal = false;
2827  ITERATE(CPubdesc::TPub::Tdata, it, pd.GetPub().Get()) {
2828  if ((*it)->IsMuid() || (*it)->IsPmid()) {
2829  if (is_refseq_prot) {
2830  found_non_minimal = true;
2831  break;
2832  }
2833  } else if ((*it)->IsGen()) {
2834  const CCit_gen& gen = (*it)->GetGen();
2835  if (gen.IsSetCit() && !gen.IsSetJournal() &&
2836  !gen.IsSetAuthors() && !gen.IsSetVolume() &&
2837  !gen.IsSetPages()) {
2838  //minimalish, keep looking
2839  } else {
2840  found_non_minimal = true;
2841  }
2842  } else {
2843  found_non_minimal = true;
2844  break;
2845  }
2846  }
2847 
2848  return !found_non_minimal;
2849 }
2850 
2851 
2853 {
2854  bool found_site_ref = false;
2856  while (f && !found_site_ref) {
2857  if (IsSiteRef(*(f->GetSeq_feat()))) {
2858  found_site_ref = true;
2859  }
2860  ++f;
2861  }
2862  if (!found_site_ref) {
2863  return false;
2864  }
2865 
2866  bool any_change = false;
2867  for (CBioseq_CI b(seh); b; ++b) {
2868  bool is_refseq_prot = false;
2869  if (b->IsAa()) {
2870  ITERATE(CBioseq::TId, id_it, b->GetCompleteBioseq()->GetId()) {
2871  if ((*id_it)->IsOther()) {
2872  is_refseq_prot = true;
2873  break;
2874  }
2875  }
2876  }
2877 
2878  for (CFeat_CI p(*b); p; ++p) {
2879  if (!p->IsSetCit() || p->GetCit().Which() != CPub_set::e_Pub) {
2880  continue;
2881  }
2882 
2883  bool is_site_ref = IsSiteRef(*(p->GetSeq_feat()));
2884  ITERATE(CSeq_feat::TCit::TPub, c, p->GetCit().GetPub()) {
2885  CRef<CSeqdesc> d(new CSeqdesc());
2886  if ((*c)->IsEquiv()) {
2887  ITERATE(CPub_equiv::Tdata, t, (*c)->GetEquiv().Get()) {
2888  CRef<CPub> pub_copy(new CPub());
2889  pub_copy->Assign(**t);
2890  d->SetPub().SetPub().Set().push_back(pub_copy);
2891  }
2892 
2893  } else {
2894  CRef<CPub> pub_copy(new CPub());
2895  pub_copy->Assign(**c);
2896  d->SetPub().SetPub().Set().push_back(pub_copy);
2897  }
2898  if (is_site_ref) {
2900  } else {
2902  }
2904  CNewCleanup_imp pubclean(changes, 0);
2905  pubclean.BasicCleanup(d->SetPub(), ShouldStripPubSerial(*(b->GetCompleteBioseq())));
2906  if (!IsMinPub(d->SetPub(), is_refseq_prot)) {
2907  MoveOneFeatToPubdesc(*p, d, *b, false);
2908  }
2909  }
2910  if (is_site_ref) {
2911  CSeq_feat_EditHandle feh(*p);
2912  feh.Remove();
2913  }
2914  any_change = true;
2915  }
2916  }
2917  return any_change;
2918 }
2919 
2920 
2922 {
2923  if (src1.IsSetOrg() && src1.GetOrg().IsSetTaxname() &&
2924  src2.IsSetOrg() && src2.GetOrg().IsSetTaxname() &&
2925  NStr::Equal(src1.GetOrg().GetTaxname(), src2.GetOrg().GetTaxname())) {
2926  return true;
2927  } else {
2928  return false;
2929  }
2930 }
2931 
2932 
2934 {
2935  bool any_change = false;
2936  // genome
2937  if ((!src1.IsSetGenome() || src1.GetGenome() == CBioSource::eGenome_unknown) &&
2939  src1.SetGenome(add.GetGenome());
2940  any_change = true;
2941  }
2942  // origin
2943  if ((!src1.IsSetOrigin() || src1.GetOrigin() == CBioSource::eOrigin_unknown) &&
2945  src1.SetOrigin(add.GetOrigin());
2946  any_change = true;
2947  }
2948  // focus
2949  if (!src1.IsSetIs_focus() && add.IsSetIs_focus()) {
2950  src1.SetIs_focus();
2951  any_change = true;
2952  }
2953 
2954  // merge subtypes
2955  if (add.IsSetSubtype()) {
2957  CRef<CSubSource> a(new CSubSource());
2958  a->Assign(**it);
2959  src1.SetSubtype().push_back(a);
2960  }
2961  any_change = true;
2962  }
2963 
2964  x_MergeDupOrgRefs(src1.SetOrg(), add.GetOrg());
2965 
2966  return any_change;
2967 }
2968 
2969 
2971 {
2972  bool any_change = false;
2973 
2974  // OrgMods
2975  if (add.IsSetMod()) {
2976  ITERATE(COrgName::TMod, it, add.GetMod()) {
2977  CRef<COrgMod> a(new COrgMod());
2978  a->Assign(**it);
2979  on1.SetMod().push_back(a);
2980  }
2981  any_change = true;
2982  }
2983 
2984  // gcode
2985  if ((!on1.IsSetGcode() || on1.GetGcode() == 0) && add.IsSetGcode() && add.GetGcode() != 0) {
2986  on1.SetGcode(add.GetGcode());
2987  any_change = true;
2988  }
2989 
2990  // mgcode
2991  if ((!on1.IsSetMgcode() || on1.GetMgcode() == 0) && add.IsSetMgcode() && add.GetMgcode() != 0) {
2992  on1.SetMgcode(add.GetMgcode());
2993  any_change = true;
2994  }
2995 
2996  // lineage
2997  if (!on1.IsSetLineage() && add.IsSetLineage()) {
2998  on1.SetLineage(add.GetLineage());
2999  any_change = true;
3000  }
3001 
3002  // div
3003  if (!on1.IsSetDiv() && add.IsSetDiv()) {
3004  on1.SetDiv(add.GetDiv());
3005  any_change = true;
3006  }
3007 
3008  return any_change;
3009 }
3010 
3011 
3012 bool HasMod(const COrg_ref& org, const string& mod)
3013 {
3014  if (!org.IsSetMod()) {
3015  return false;
3016  }
3017  ITERATE(COrg_ref::TMod, it, org.GetMod()) {
3018  if (NStr::Equal(*it, mod)) {
3019  return true;
3020  }
3021  }
3022  return false;
3023 }
3024 
3025 
3027 {
3028  bool any_change = false;
3029  // mods
3030  if (add.IsSetMod()) {
3031  ITERATE(COrg_ref::TMod, it, add.GetMod()) {
3032  if (!HasMod(org1, *it)) {
3033  org1.SetMod().push_back(*it);
3034  any_change = true;
3035  }
3036  }
3037  }
3038 
3039  // dbxrefs
3040  if (add.IsSetDb()) {
3041  ITERATE(COrg_ref::TDb, it, add.GetDb()) {
3042  CRef<CDbtag> a(new CDbtag());
3043  a->Assign(**it);
3044  org1.SetDb().push_back(a);
3045  }
3046  any_change = true;
3047  }
3048 
3049  // synonyms
3050  if (add.IsSetSyn()) {
3051  ITERATE(COrg_ref::TSyn, it, add.GetSyn()) {
3052  org1.SetSyn().push_back(*it);
3053  }
3054  any_change = true;
3055  }
3056 
3057  if (add.IsSetOrgname()) {
3058  any_change |= x_MergeDupOrgNames(org1.SetOrgname(), add.GetOrgname());
3059  }
3060 
3061  return any_change;
3062 }
3063 
3064 
3066 {
3067  bool any_change = false;
3068  CSeq_descr::Tdata::iterator src1 = seq_descr.Set().begin();
3069  while (src1 != seq_descr.Set().end()) {
3070  if ((*src1)->IsSource() && (*src1)->GetSource().IsSetOrg() && (*src1)->GetSource().GetOrg().IsSetTaxname()) {
3071  CSeq_descr::Tdata::iterator src2 = src1;
3072  ++src2;
3073  while (src2 != seq_descr.Set().end()) {
3074  if ((*src2)->IsSource() &&
3075  AreBioSourcesMergeable((*src1)->GetSource(), (*src2)->GetSource())) {
3076  MergeDupBioSources((*src1)->SetSource(), (*src2)->GetSource());
3077 
3079  CNewCleanup_imp srcclean(changes, 0);
3080  srcclean.ExtendedCleanup((*src1)->SetSource());
3081  src2 = seq_descr.Set().erase(src2);
3082  any_change = true;
3083  } else {
3084  ++src2;
3085  }
3086  }
3087  }
3088  ++src1;
3089  }
3090  return any_change;
3091 }
3092 
3093 /// Remove duplicate biosource descriptors
3095 {
3096  bool any_change = false;
3097  vector<CConstRef<CBioSource> > src_list;
3098  CSeq_descr::Tdata::iterator d = descr.Set().begin();
3099  while (d != descr.Set().end()) {
3100  if ((*d)->IsSource()) {
3101  bool found = false;
3102  ITERATE(vector<CConstRef<CBioSource> >, s, src_list) {
3103  if ((*d)->GetSource().Equals(**s)) {
3104  found = true;
3105  break;
3106  }
3107  }
3108  if (found) {
3109  d = descr.Set().erase(d);
3110  any_change = true;
3111  } else {
3112  CConstRef<CBioSource> src(&((*d)->GetSource()));
3113  src_list.push_back(src);
3114  ++d;
3115  }
3116  } else {
3117  ++d;
3118  }
3119  }
3120  return any_change;
3121 }
3122 
3123 
3125 {
3126  if (!f.IsSetData() || !f.GetData().IsBiosrc()) {
3127  return CRef<CBioSource>(NULL);
3128  }
3129  CRef<CBioSource> src(new CBioSource());
3130  src->Assign(f.GetData().GetBiosrc());
3131 
3132  // move comment to subsource note
3133  if (f.IsSetComment()) {
3134  CRef<CSubSource> s(new CSubSource());
3136  s->SetName(f.GetComment());
3137  src->SetSubtype().push_back(s);
3138 
3139  }
3140 
3141  // move dbxrefs on feature to source
3142  if (f.IsSetDbxref()) {
3144  CRef<CDbtag> a(new CDbtag());
3145  a->Assign(**it);
3146  src->SetOrg().SetDb().push_back(a);
3147  }
3148  }
3150  CNewCleanup_imp srcclean(changes, 0);
3151  srcclean.ExtendedCleanup(*src);
3152 
3153  return src;
3154 }
3155 
3156 
3158 {
3159  bool any_change = false;
3160  for (CBioseq_CI b(seh); b; ++b) {
3161  for (CFeat_CI p(*b, CSeqFeatData::e_Biosrc); p; ++p) {
3162  if (p->GetLocation().IsInt() &&
3163  p->GetLocation().GetStart(eExtreme_Biological) == 0 &&
3164  p->GetLocation().GetStop(eExtreme_Biological) == b->GetBioseqLength() - 1) {
3165  CRef<CSeqdesc> d(new CSeqdesc());
3166  d->SetSource().Assign(*(BioSrcFromFeat(*(p->GetSeq_feat()))));
3167 
3168  // add descriptor to nuc-prot parent or sequence itself
3169  CBioseq_set_Handle parent = b->GetParentBioseq_set();
3170  if (parent && parent.IsSetClass() &&
3171  parent.GetClass() == CBioseq_set::eClass_nuc_prot) {
3172  CBioseq_set_EditHandle eh(parent);
3173  eh.AddSeqdesc(*d);
3177  } else {
3178  CBioseq_EditHandle eh(*b);
3179  eh.AddSeqdesc(*d);
3183  }
3184 
3185  // remove feature
3186  CSeq_feat_EditHandle feh(*p);
3187  feh.Remove();
3188 
3189  any_change = true;
3190  }
3191  }
3192  }
3193  return any_change;
3194 }
3195 
3196 
3197 
3199 {
3200  CFeat_CI fi(seh);
3201  size_t num_gene_locus = 0;
3202  size_t num_gene_locus_tag = 0;
3203  size_t num_gene_xref_locus = 0;
3204  size_t num_gene_xref_locus_tag = 0;
3205 
3206  while (fi) {
3207  if (fi->GetData().IsGene()) {
3208  if (fi->GetData().GetGene().IsSetLocus()) {
3209  num_gene_locus++;
3210  }
3211  if (fi->GetData().GetGene().IsSetLocus_tag()) {
3212  num_gene_locus_tag++;
3213  }
3214  } else if (fi->IsSetXref()) {
3215  const CGene_ref* g = fi->GetGeneXref();
3216  if (g) {
3217  if (g->IsSetLocus()) {
3218  num_gene_xref_locus++;
3219  }
3220  if (g->IsSetLocus_tag()) {
3221  num_gene_xref_locus_tag++;
3222  }
3223  }
3224  }
3225  if (num_gene_locus > 0) {
3226  if (num_gene_locus_tag > 0) {
3227  return false;
3228  }
3229  if (num_gene_xref_locus > 0) {
3230  return false;
3231  }
3232  }
3233  if (num_gene_locus_tag > 0) {
3234  if (num_gene_locus > 0) {
3235  return false;
3236  }
3237  if (num_gene_xref_locus_tag > 0) {
3238  return false;
3239  }
3240  }
3241  ++fi;
3242  }
3243 
3244  bool any_change = false;
3245  if (num_gene_locus == 0 && num_gene_locus_tag > 0) {
3246  if (num_gene_xref_locus > 0 && num_gene_xref_locus_tag == 0) {
3247  fi.Rewind();
3248  while (fi) {
3249  if (!fi->GetData().IsGene() && fi->GetGeneXref() != NULL) {
3250  bool this_change = false;
3251  CRef<CSeq_feat> new_f(new CSeq_feat());
3252  new_f->Assign(*(fi->GetSeq_feat()));
3253  NON_CONST_ITERATE(CSeq_feat::TXref, it, new_f->SetXref()) {
3254  if ((*it)->IsSetData() && (*it)->GetData().IsGene()
3255  && (*it)->GetData().GetGene().IsSetLocus()) {
3256  (*it)->SetData().SetGene().SetLocus_tag((*it)->GetData().GetGene().GetLocus());
3257  (*it)->SetData().SetGene().ResetLocus();
3258  this_change = true;
3259  }
3260  }
3261  if (this_change) {
3262  CSeq_feat_EditHandle eh(*fi);
3263  eh.Replace(*new_f);
3264  }
3265  }
3266  ++fi;
3267  }
3268  }
3269  } else if (num_gene_locus > 0 && num_gene_locus_tag == 0) {
3270  if (num_gene_xref_locus == 0 && num_gene_xref_locus_tag > 0) {
3271  fi.Rewind();
3272  while (fi) {
3273  if (!fi->GetData().IsGene() && fi->GetGeneXref() != NULL) {
3274  bool this_change = false;
3275  CRef<CSeq_feat> new_f(new CSeq_feat());
3276  new_f->Assign(*(fi->GetSeq_feat()));
3277  NON_CONST_ITERATE(CSeq_feat::TXref, it, new_f->SetXref()) {
3278  if ((*it)->IsSetData() && (*it)->GetData().IsGene()
3279  && (*it)->GetData().GetGene().IsSetLocus_tag()) {
3280  (*it)->SetData().SetGene().SetLocus((*it)->GetData().GetGene().GetLocus_tag());
3281  (*it)->SetData().SetGene().ResetLocus_tag();
3282  this_change = true;
3283  }
3284  }
3285  if (this_change) {
3286  CSeq_feat_EditHandle eh(*fi);
3287  eh.Replace(*new_f);
3288  any_change = true;
3289  }
3290  }
3291  ++fi;
3292  }
3293  }
3294  }
3295  return any_change;
3296 }
3297 
3298 
3300 {
3301  bool strip_serial = true;
3302  ITERATE(CBioseq::TId, id, bs.GetId()) {
3303  const CSeq_id& sid = **id;
3304  switch (sid.Which()) {
3305  case NCBI_SEQID(Genbank):
3306  case NCBI_SEQID(Tpg):
3307  {
3308  const CTextseq_id& tsid = *GET_FIELD(sid, Textseq_Id);
3309  if (FIELD_IS_SET(tsid, Accession)) {
3310  const string& acc = GET_FIELD(tsid, Accession);
3311  if (acc.length() == 6) {
3312  strip_serial = false;
3313  }
3314  }
3315  }
3316  break;
3317  case NCBI_SEQID(Embl):
3318  case NCBI_SEQID(Ddbj):
3319  strip_serial = false;
3320  break;
3321  case NCBI_SEQID(not_set):
3322  case NCBI_SEQID(Local):
3323  case NCBI_SEQID(Other):
3324  case NCBI_SEQID(General):
3325  break;
3326  case NCBI_SEQID(Gibbsq):
3327  case NCBI_SEQID(Gibbmt):
3328  case NCBI_SEQID(Pir):
3329  case NCBI_SEQID(Swissprot):
3330  case NCBI_SEQID(Patent):
3331  case NCBI_SEQID(Prf):
3332  case NCBI_SEQID(Pdb):
3333  case NCBI_SEQID(Gpipe):
3334  case NCBI_SEQID(Tpe):
3335  case NCBI_SEQID(Tpd):
3336  strip_serial = false;
3337  break;
3338  default:
3339  break;
3340  }
3341  }
3342  return strip_serial;
3343 }
3344 
3345 
3347 {
3348  bool change_made = false;
3350  if (seh.IsSet() && seh.GetSet().IsSetClass() &&
3351  entry->GetSet().IsSetSeq_set()) {
3352  CBioseq_set::TClass set_class = seh.GetSet().GetClass();
3353  if (set_class == CBioseq_set::eClass_nuc_prot) {
3354  if (entry->GetSet().GetSeq_set().size() == 1 &&
3355  entry->GetSet().GetSeq_set().front()->IsSeq()) {
3357  eh.ConvertSetToSeq();
3358  if (eh.GetSeq().IsSetDescr()) {
3359  RemoveUnseenTitles(eh.SetSeq());
3360  NormalizeDescriptorOrder(eh.SetSeq().SetDescr());
3361  }
3362  change_made = true;
3363  }
3364  } else if (set_class == CBioseq_set::eClass_genbank ||
3365  set_class == CBioseq_set::eClass_mut_set ||
3366  set_class == CBioseq_set::eClass_pop_set ||
3367  set_class == CBioseq_set::eClass_phy_set ||
3368  set_class == CBioseq_set::eClass_eco_set ||
3369  set_class == CBioseq_set::eClass_wgs_set ||
3370  set_class == CBioseq_set::eClass_gen_prod_set ||
3371  set_class == CBioseq_set::eClass_small_genome_set) {
3374  change_made |= RenormalizeNucProtSets(ch);
3375  }
3376  }
3377  }
3378  return change_made;
3379 }
3380 
TSet SetSet(void) const
#define FOR_EACH_ORGMOD_ON_ORGREF(Itr, Var)
FOR_EACH_ORGMOD_ON_ORGREF EDIT_EACH_ORGMOD_ON_ORGREF.
bool IsStd(void) const
Check if variant Std is selected.
Definition: Auth_list_.hpp:396
CBioseq_Handle –.
bool IsSetExcept_text(void) const
explain if except=TRUE Check if a value has been assigned to Except_text data member.
Definition: Seq_feat_.hpp:1351
void SetPacked_int(TPacked_int &v)
Definition: Seq_loc.hpp:966
bool CanGetLocus(void) const
Check if it is safe to call GetLocus method.
Definition: Gene_ref_.hpp:479
void Replace(const CSeq_feat &new_feat) const
Replace the feature with new Seq-feat object.
static bool WGSCleanup(CSeq_entry_Handle entry)
Performs WGS specific cleanup.
Definition: cleanup.cpp:2221
bool s_IsPreprotein(CSeq_feat_Handle fh)
Definition: cleanup.cpp:500
const TExcept_text & GetExcept_text(void) const
Get the Except_text member data.
Definition: Seq_feat_.hpp:1363
CSeq_entry_Handle GetParentEntry(void) const
Return a handle for the parent seq-entry of the bioseq.
CProt_ref::EProcessed s_ProcessedFromKey(const string &key)
Definition: cleanup.cpp:439
const Tdata & Get(void) const
Get the member data.
Definition: Seq_descr_.hpp:167
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
Definition: BioSource_.hpp:485
bool IsSetName(void) const
name used in paper Check if a value has been assigned to Name data member.
Definition: Pubdesc_.hpp:601
void ResetCode(void)
Reset Code data member.
Definition: Cdregion_.cpp:63
const TSeq_data & GetSeq_data(void) const
Get the Seq_data member data.
Definition: Seq_inst_.hpp:802
void RescueProtProductQual(CSeq_feat &feat)
Definition: cleanup.cpp:518
static bool RenormalizeNucProtSets(CSeq_entry_Handle seh)
Convert nuc-prot sets with just one sequence to just the sequence can't be done during the explore ph...
Definition: cleanup.cpp:3346
CSeq_feat_Handle GetGeneWithLocus(const string &locus, bool tag) const
Definition: tse_handle.cpp:708
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:900
CConstRef –.
Definition: ncbiobj.hpp:1192
SeqVector related exceptions.
bool IsSetSubtype(void) const
Check if a value has been assigned to Subtype data member.
Definition: BioSource_.hpp:515
bool IsSetCit(void) const
anything, not parsable Check if a value has been assigned to Cit data member.
Definition: Cit_gen_.hpp:554
refers to unspecified features
Definition: Pubdesc_.hpp:93
Set coding to printable coding (Iupacna or Iupacaa)
static const char * suffix[]
Definition: pcregrep.c:254
CRef< CSeqdesc > RemoveSeqdesc(const CSeqdesc &d) const
GenBank specific info.
Definition: Seqdesc_.hpp:121
const TInst & GetInst(void) const
#define FOR_EACH_SEQID_ON_BIOSEQ(Itr, Var)
FOR_EACH_SEQID_ON_BIOSEQ EDIT_EACH_SEQID_ON_BIOSEQ.
Definition: seq_macros.hpp:308
PDB sequence.
Definition: Seq_id_.hpp:109
void SetDescr(TDescr &v) const
list< CRef< CSubSource > > TSubtype
Definition: BioSource_.hpp:145
not set, code uses one
Definition: Cdregion_.hpp:95
static bool ShouldRemoveAnnot(const CSeq_annot &annot)
static bool IsMinPub(const CPubdesc &pd, bool is_refseq_prot)
Is this a "minimal" pub? (If yes, do not rescue from a Seq-feat.cit)
Definition: cleanup.cpp:2821
CScope & GetScope(void) const
Get scope this handle belongs to.
TPrim & Set(void)
Definition: serialbase.hpp:310
bool Asn2gnbkCompressSpaces(string &val)
ENa_strand GetStrand(void) const
Get the location's strand.
Definition: Seq_loc.cpp:864
All the changes made during cleanup.
bool IsSetDescr(void) const
TMolinfo & SetMolinfo(void)
Select the variant.
Definition: Seqdesc_.cpp:594
void ResetQual(void)
Reset Qual data member.
Definition: Seq_feat_.cpp:136
void SetInst(TInst &v) const
void SetQual(const TQual &value)
Assign a value to Qual data member.
Definition: Gb_qual_.hpp:211
bool ConvertProteinToImp(CSeq_feat_Handle fh)
Definition: cleanup.cpp:477
EProcessed
processing status
Definition: Prot_ref_.hpp:95
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:102
if all from one organism
Definition: Seqdesc_.hpp:116
static bool RemoveDuplicatePubs(CSeq_descr &descr)
Remove duplicate publications.
Definition: cleanup.cpp:2676
size_t ChangeCount() const
Definition: cleanup.cpp:271
vector< EChanges > GetAllChanges() const
Definition: cleanup.cpp:289
bool IsFtable(void) const
Definition: Seq_annot.cpp:177
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:62
bool IsBiosrc(void) const
Check if variant Biosrc is selected.
CBioseq_EditHandle –.
Ignore overlap strand if the source location has mixed/both strand.
Definition: sequence.hpp:578
bool IsSetAuthors(void) const
Check if a value has been assigned to Authors data member.
Definition: Cit_gen_.hpp:594
static bool ConvertPubFeatsToPubDescs(CSeq_entry_Handle seh)
Convert full-length publication features to publication descriptors.
Definition: cleanup.cpp:2783
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:73
unsigned int Uint4
Alias for unsigned int.
Definition: ncbitype.h:121
const TCode & GetCode(void) const
Get the Code member data.
Definition: Cdregion_.hpp:698
bool IsSetTaxname(void) const
preferred formal name Check if a value has been assigned to Taxname data member.
Definition: Org_ref_.hpp:346
CBioseq_set_EditHandle GetParentBioseq_set(void) const
Get parent bioseq-set edit handle.
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:953
vector< CRef< CSeqFeatXref > > TXref
Definition: Seq_feat_.hpp:122
bool IsSetComment(void) const
any comment on this pub in context Check if a value has been assigned to Comment data member...
Definition: Pubdesc_.hpp:923
CSeq_locs contain each other.
const TComment & GetComment(void) const
Get the Comment member data.
Definition: Seq_feat_.hpp:1021
Tdata & Set(void)
Assign a value to data member.
Definition: Seq_descr_.hpp:173
void Add(const CSeq_loc &other)
Simple adding of seq-locs.
Definition: Seq_loc.cpp:3857
static const string & GetECNumberReplacement(const string &old_ecno)
Return a replaced EC number's replacement.
Definition: Prot_ref.cpp:196
#define fi
static void MoveOneFeatToPubdesc(CSeq_feat_Handle feat, CRef< CSeqdesc > d, CBioseq_Handle b, bool remove_feat=true)
Definition: cleanup.cpp:2748
bool IsAa(void) const
Definition: Bioseq.cpp:350
static bool AddProteinTitle(CBioseq_Handle bsh)
Creates missing protein title descriptor.
Definition: cleanup.cpp:1597
bool seq_mac_is_sorted(Iter first, Iter last, Comp comp)
#define GET_MUTABLE(Var, Fld)
GET_MUTABLE base macro.
Definition: Pub.hpp:55
cofactor, etc associated but not bound
Definition: Seqdesc_.hpp:132
CSeq_annot_Handle GetSeq_annotHandle(const CSeq_annot &annot, EMissing action=eMissing_Default)
Definition: scope.cpp:180
SAnnotSelector & IncludeFeatType(TFeatType type)
Include feature type in the search.
static bool TaxonomyLookup(CSeq_entry_Handle seh)
Looks up Org-refs in the Seq-entry.
Definition: cleanup.cpp:1692
Case insensitive compare.
Definition: ncbistr.hpp:1156
CConstRef< CSeq_feat > GetOriginalSeq_feat(void) const
const TSource & GetSource(void) const
Get the variant data.
Definition: Seqdesc_.cpp:566
void Remove(void) const
Remove the feature from Seq-annot.
bool IsSetIs_focus(void) const
to distinguish biological focus Check if a value has been assigned to Is_focus data member...
Definition: BioSource_.hpp:540
void SetIs_focus(void)
Set NULL data member (assign 'NULL' value to Is_focus data member).
Definition: BioSource_.hpp:558
#define EDIT_EACH_NAME_ON_PROTREF(Itr, Var)
CSeq_entry_Handle GetSeq_entryHandle(CDataLoader *loader, const TBlobId &blob_id, EMissing action=eMissing_Default)
Get Seq-entry handle by its blob-id, with possible loading.
Definition: scope.cpp:113
TSeq SetSeq(void) const
TFrame GetFrame(void) const
Get the Frame member data.
Definition: Cdregion_.hpp:520
bool IsSetPseudo(void) const
annotated on pseudogene? Check if a value has been assigned to Pseudo data member.
Definition: Seq_feat_.hpp:1304
bool IsSiteRef(const CSeq_feat &sf)
Definition: cleanup.cpp:2809
bool IsSetOrigin(void) const
Check if a value has been assigned to Origin data member.
Definition: BioSource_.hpp:435
CSeq_entry_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
void Remove(ERemoveMode mode=eRemoveSeq_entry) const
CConstRef< CBioseq_set > GetCompleteBioseq_set(void) const
Return the complete bioseq-set object.
const TProduct & GetProduct(void) const
Get the Product member data.
Definition: Seq_feat_.hpp:1061
TGenome GetGenome(void) const
Get the Genome member data.
Definition: BioSource_.hpp:410
void ResetDescr(void)
Reset Descr data member.
CConstRef< CSeq_feat > GetSeq_feat(void) const
Get current seq-feat.
User-defined methods of the data storage class.
bool IsSetGenome(void) const
Check if a value has been assigned to Genome data member.
Definition: BioSource_.hpp:385
void BasicCleanupSeqEntryHandle(CSeq_entry_Handle &seh)
CCleanup(CScope *scope=NULL)
Definition: cleanup.cpp:81
list< string > TSyn
Definition: Org_ref_.hpp:102
TMol GetMol(void) const
Get the Mol member data.
Definition: Seq_inst_.hpp:597
~CCleanup()
Definition: cleanup.cpp:90
bool IsSetXref(void) const
CConstRef< CSeq_feat > GetmRNAforCDS(const CSeq_feat &cds, CScope &scope)
GetmRNAforCDS A function to find a CSeq_feat representing the appropriate mRNA for a given CDS...
Definition: sequence.cpp:1171
Obsolete synonym for some other EC number.
Definition: Prot_ref.hpp:66
bool IsSetPseudo(void) const
pseudogene Check if a value has been assigned to Pseudo data member.
Definition: Gene_ref_.hpp:633
CScope & GetScope(void) const
Get scope this handle belongs to.
static CConstRef< CSeq_feat > GetGeneForFeature(const CSeq_feat &feat, CScope &scope)
Definition: cleanup.cpp:1140
TSet GetSet(void) const
ecological sample study
void SetOrgname(TOrgname &value)
Assign a value to Orgname data member.
Definition: Org_ref_.cpp:87
static const string & GetProteinName(const CProt_ref &prot)
Definition: cleanup.cpp:1299
standard sequencing
Definition: MolInfo_.hpp:124
void BasicCleanup(CPubdesc &pd, bool strip_serial)
#define NCBI_SEQMOL(Type)
Definition: seq_macros.hpp:61
static void SetProteinName(CProt_ref &prot, const string &protein_name, bool append)
Definition: cleanup.cpp:1244
string
Definition: cgiapp.hpp:437
list< string > TEc
Definition: Prot_ref_.hpp:109
CConstRef< CSeq_entry > GetCompleteSeq_entry(void) const
Complete and get const reference to the seq-entry.
model evidence for XM records
Definition: Seqdesc_.hpp:135
Int mod(Int i, Int j)
Definition: njn_integer.hpp:67
#define BEGIN_COMMA_END(container)
Map from the feature's location to product.
CSeq_annot_EditHandle AttachAnnot(CSeq_annot &annot) const
Attach an annotation.
list< CRef< CSeq_interval > > Tdata
CSeq_feat_EditHandle –.
CBioseq_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
TTitle & SetTitle(void)
Select the variant.
Definition: Seqdesc_.hpp:1039
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
Definition: ncbistr.cpp:2864
bool IsSetLocus(void) const
Official gene symbol Check if a value has been assigned to Locus data member.
Definition: Gene_ref_.hpp:473
overall region (globin locus)
Definition: Seqdesc_.hpp:123
Utility macros and typedefs for exploring NCBI objects from seqset.asn.
bool IsCdregion(void) const
Check if variant Cdregion is selected.
static bool RemoveOrphanLocus_tagGeneXrefs(CSeq_feat &f, CBioseq_Handle bsh)
Removes orphaned locus_tag Gene-xrefs.
Definition: cleanup.cpp:870
const TSeqPos offset(200)
void SetDescr(TDescr &v) const
void SetSubtype(TSubtype value)
Assign a value to Subtype data member.
Definition: SubSource_.hpp:308
bool IsSetName(void) const
protein name Check if a value has been assigned to Name data member.
Definition: Prot_ref_.hpp:352
void SetTech(TTech value)
Assign a value to Tech data member.
Definition: MolInfo_.hpp:490
const NCBI_NS_NCBI::CEnumeratedTypeValues *ENUM_METHOD_NAME() ENa_strand(void)
Access to ENa_strand's attributes (values, names) as defined in spec.
void ExtendedCleanupSeqEntryHandle(CSeq_entry_Handle &seh)
static bool SetFrameFromLoc(CCdregion &cdregion, const CSeq_loc &loc, CScope &scope)
Chooses best frame based on location 1.
Definition: cleanup.cpp:1115
SAnnotSelector –.
void SetSerial_number(TSerial_number value)
Assign a value to Serial_number data member.
Definition: Cit_gen_.hpp:852
void SetDescr(TDescr &v) const
type of molecule
Definition: Seqdesc_.hpp:111
bool IsSetId(void) const
#define NULL
Definition: ncbistd.hpp:225
static string GetDescription(EChanges e)
Definition: cleanup.cpp:313
static bool SetMolinfoBiomol(CBioseq_Handle seq, CMolInfo::EBiomol biomol)
Sets MolInfo::biomol for a sequence.
Definition: cleanup.cpp:1537
TSeqPos size(void) const
Definition: seq_vector.hpp:291
bool IsSetDbxref(void) const
support for xref to other databases Check if a value has been assigned to Dbxref data member...
Definition: Seq_feat_.hpp:1279
static bool SetMolinfoTech(CBioseq_Handle seq, CMolInfo::ETech tech)
Sets MolInfo::tech for a sequence.
Definition: cleanup.cpp:1513
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1343
static bool IsECNumberSplit(const string &old_ecno)
Definition: Prot_ref.cpp:213
bool IsSetLineage(void) const
lineage with semicolon separators Check if a value has been assigned to Lineage data member...
Definition: OrgName_.hpp:828
void SetBiomol(TBiomol value)
Assign a value to Biomol data member.
Definition: MolInfo_.hpp:440
const TMod & GetMod(void) const
Get the Mod member data.
Definition: OrgName_.hpp:815
#define kEmptyStr
Definition: ncbistr.hpp:120
list< CRef< CSeq_annot > > TAnnot
Definition: Bioseq_.hpp:97
bool IsSetComment(void) const
Check if a value has been assigned to Comment data member.
Definition: Seq_feat_.hpp:1009
bool IsSetNames(void) const
Check if a value has been assigned to Names data member.
Definition: Auth_list_.hpp:456
const TName & GetName(void) const
Get the Name member data.
Definition: Prot_ref_.hpp:364
static bool SetBestFrame(CSeq_feat &cds, CScope &scope)
Translates coding region and selects best frame (without stops, or longest)
Definition: cleanup.cpp:1056
SWISSPROT specific info.
Definition: Seqdesc_.hpp:125
void SetDescr(CSeq_descr &value)
Definition: Seq_entry.cpp:134
bool IsSetAnnot(void) const
Check if a value has been assigned to Annot data member.
Definition: Bioseq_.hpp:344
CSeq_entry_CI –.
CScope & GetScope(void) const
Get scope this handle belongs to.
bool s_FirstPubMatchesSecond(const CPubdesc &pd1, const CPubdesc &pd2)
Definition: cleanup.cpp:2699
void SetReftype(TReftype value)
Assign a value to Reftype data member.
Definition: Pubdesc_.hpp:994
void SetLocation(TLocation &value)
Assign a value to Location data member.
Definition: Seq_feat_.cpp:131
.hpp User-defined methods of the data storage class.
Definition: Seq_descr.hpp:54
virtual const CSeq_loc & GetLocation(void) const
date of last update
Definition: Seqdesc_.hpp:129
static bool FixGeneXrefSkew(CSeq_entry_Handle seh)
Examine all genes and gene xrefs in the Seq-entry.
Definition: cleanup.cpp:3198
CConstRef< CBioseq_set > GetParentSet(void) const
Definition: Bioseq_set.cpp:312
const TKey & GetKey(void) const
Get the Key member data.
Definition: Imp_feat_.hpp:247
void SetPub(TPub &value)
Assign a value to Pub data member.
Definition: Pubdesc_.cpp:72
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:893
const TId & GetId(void) const
Get the Id member data.
Definition: Bioseq_.hpp:280
#define NPOS
Definition: ncbistr.hpp:130
CRef< CScope > m_Scope
Definition: cleanup.hpp:428
bool CanGetLocus_tag(void) const
Check if it is safe to call GetLocus_tag method.
Definition: Gene_ref_.hpp:739
int i
void AddProteinFeature(const CBioseq &seq, const string &protein_name, const CSeq_feat &cds, CScope &scope)
AddProteinFeature A function to create a protein feature with the specified protein name...
Definition: feature.cpp:3690
void SetFrame(TFrame value)
Assign a value to Frame data member.
Definition: Cdregion_.hpp:526
CConstRef< CSeq_loc > GetRangeAsSeq_loc(void) const
Get seq-loc for the current iterator position.
Definition: Seq_loc.cpp:2567
CRef< objects::CSeq_id > GetNewProteinId(objects::CSeq_entry_Handle seh, objects::CBioseq_Handle bsh)
Definition: cleanup.cpp:1836
bool IsSetXref(void) const
cite other relevant features Check if a value has been assigned to Xref data member.
Definition: Seq_feat_.hpp:1254
bool IsSetFrame(void) const
Check if a value has been assigned to Frame data member.
Definition: Cdregion_.hpp:495
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1082
const TBiosrc & GetBiosrc(void) const
Get the variant data.
CScope & GetScope(void) const
Get scope this handle belongs to.
EChangeType
Definition: cleanup.cpp:74
First CSeq_loc contains second.
static bool AddPartialToProteinTitle(CBioseq &bioseq)
Adjusts protein title to reflect partialness.
Definition: cleanup.cpp:2001
bool IsSetInst(void) const
the sequence data Check if a value has been assigned to Inst data member.
Definition: Bioseq_.hpp:314
#define FIELD_IS(Var, Fld)
Generic FIELD macros.
bool IsSetClass(void) const
bool IsSetSeq_set(void) const
Check if a value has been assigned to Seq_set data member.
virtual void Init(void)
Definition: taxon3.cpp:66
TPseudo GetPseudo(void) const
Get the Pseudo member data.
Definition: Gene_ref_.hpp:658
vector< string > GetAllDescriptions() const
Definition: cleanup.cpp:301
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Definition: njn_matrix.hpp:612
#define FIELD_EQUALS(Var, Fld, Value)
FIELD_EQUALS base macro.
void SetLineage(const TLineage &value)
Assign a value to Lineage data member.
Definition: OrgName_.hpp:849
const TId & GetId(void) const
TSet ConvertSeqToSet(TClass set_class=CBioseq_set::eClass_not_set) const
Convert the entry from Bioseq to Bioseq-set.
Last occurrence.
Definition: ncbistr.hpp:1855
const TPub & GetPub(void) const
Get the Pub member data.
Definition: Pubdesc_.hpp:583
static bool s_SeqDescLessThan(const CRef< CSeqdesc > &desc1, const CRef< CSeqdesc > &desc2)
Definition: cleanup.cpp:2390
bool IsSetSyn(void) const
synonyms for taxname or common Check if a value has been assigned to Syn data member.
Definition: Org_ref_.hpp:476
bool AddSeqdesc(CSeqdesc &v) const
SeqMap related exceptions.
const TPub & GetPub(void) const
Get the variant data.
Definition: Seqdesc_.cpp:356
bool IsSetDescr(void) const
Definition: Seq_entry.cpp:106
a title for this sequence
Definition: Seqdesc_.hpp:115
static EECNumberStatus GetECNumberStatus(const string &ecno)
Determine an EC number's validity and specificity.
Definition: Prot_ref.cpp:182
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seqdesc_.hpp:903
static bool SetCDSPartialsByFrameAndTranslation(CSeq_feat &cds, CScope &scope)
1.
Definition: cleanup.cpp:1331
bool IsAa(void) const
bool IsSource(void) const
Check if variant Source is selected.
Definition: Seqdesc_.hpp:1190
static bool RescueSiteRefPubs(CSeq_entry_Handle seh)
Rescue pubs from Site-ref features.
Definition: cleanup.cpp:2852
list< CRef< CSeq_entry > > TSeq_set
TXref & SetXref(void)
Assign a value to Xref data member.
Definition: Seq_feat_.hpp:1272
static bool RemoveOrphanLocusGeneXrefs(CSeq_feat &f, CBioseq_Handle bsh)
Removes orphaned locus Gene-xrefs.
Definition: cleanup.cpp:823
bool IsSetGcode(void) const
genetic code (see CdRegion) Check if a value has been assigned to Gcode data member.
Definition: OrgName_.hpp:868
static bool IsGeneXrefUnnecessary(const CSeq_feat &sf, CScope &scope, const CGene_ref &gene_xref)
Calculates whether a Gene-xref is unnecessary (because it refers to the same gene as would be calcula...
Definition: cleanup.cpp:695
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:101
CConstRef< CCleanupChange > BasicCleanup(CSeq_entry &se, Uint4 options=0)
Definition: cleanup.cpp:119
sequencing method
Definition: Seqdesc_.hpp:113
#define FOR_EACH_SEQDESC_ON_BIOSEQ(Itr, Var)
FOR_EACH_SEQDESC_ON_BIOSEQ EDIT_EACH_SEQDESC_ON_BIOSEQ.
Definition: seq_macros.hpp:218
bool IsSetDiv(void) const
GenBank division code Check if a value has been assigned to Div data member.
Definition: OrgName_.hpp:962
static bool UpdateECNumbers(CProt_ref::TEc &ec_num_list)
Update EC numbers.
Definition: cleanup.cpp:1451
static CRef< CBioseq > TranslateToProtein(const CSeq_feat &cds, CScope &scope)
Definition: sequence.cpp:3582
static int s_SeqDescToOrdering(const CRef< CSeqdesc > &desc)
Definition: cleanup.cpp:2377
static bool ConvertSrcFeatsToSrcDescs(CSeq_entry_Handle seh)
Convert full-length source features to source descriptors.
Definition: cleanup.cpp:3157
#define FOR_EACH_SEQDESC_ON_SEQSET(Itr, Var)
FOR_EACH_SEQDESC_ON_SEQSET EDIT_EACH_SEQDESC_ON_SEQSET.
TSet & SetSet(void)
Select the variant.
Definition: Seq_entry_.cpp:130
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
const TMod & GetMod(void) const
Get the Mod member data.
Definition: Org_ref_.hpp:438
list< CRef< CPub > > Tdata
Definition: Pub_equiv_.hpp:90
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2762
TGen & SetGen(void)
Select the variant.
Definition: Pub_.cpp:173
#define GET_FIELD(Var, Fld)
GET_FIELD base macro.
const TStr & GetStr(void) const
Get the variant data.
Definition: Auth_list_.hpp:442
CSeqFeatXref –.
Definition: SeqFeatXref.hpp:65
#define NCBI_SEQID(Type)
Convenience macros for NCBI objects
static SIZE_TYPE s_TitleEndsInOrganism(const string &sTitle, const string &sOrganism, SIZE_TYPE *out_piOrganellePos)
Definition: cleanup.cpp:1913
void ResetDescr(void)
Reset Descr data member.
Definition: Bioseq_.cpp:60
static bool ShouldStripPubSerial(const CBioseq &bs)
Definition: cleanup.cpp:3299
bool CopyFeaturePartials(CSeq_feat &dst, const CSeq_feat &src)
CopyFeaturePartials A function to copy the start and end partialness from one feature to another...
Definition: feature.cpp:3512
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
Definition: Seq_loc.cpp:3204
CRef< CSeq_entry > AddProtein(const CSeq_feat &cds, CScope &scope)
Definition: cleanup.cpp:1770
PRF specific information.
Definition: Seqdesc_.hpp:130
string s_KeyFromProcessed(CProt_ref::EProcessed processed)
Definition: cleanup.cpp:454
void SetName(const TName &value)
Assign a value to Name data member.
Definition: SubSource_.hpp:348
ECompare
publication applies to this seq
TSubtype & SetSubtype(void)
Assign a value to Subtype data member.
Definition: BioSource_.hpp:533
CBioseq_set_Handle –.
refers to specified features
Definition: Pubdesc_.hpp:94
bool CleanVisStringJunk(string &str, bool allow_ellipses)
static bool OkToPromoteNpPub(const CPubdesc &pd)
Some pubs should not be promoted to nuc-prot set from sequence.
Definition: cleanup.cpp:2738
CBioseq_Handle GetBioseqFromSeqLoc(const CSeq_loc &loc, CScope &scope, CScope::EGetBioseqFlag flag=CScope::eGetBioseq_Loaded)
Retrieve the Bioseq Handle from a location.
Definition: sequence.cpp:229
bool RefersToSameGene(const CGene_ref &xref) const
Definition: Gene_ref.cpp:91
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
Definition: Seq_loc.cpp:897
bool IsSetVolume(void) const
Check if a value has been assigned to Volume data member.
Definition: Cit_gen_.hpp:683
bool IsImp(void) const
Check if variant Imp is selected.
CSeq_entry_EditHandle GetSeq_entryEditHandle(const CSeq_entry &entry)
Definition: scope.cpp:195
void BasicCleanupBioseqSetHandle(CBioseq_set_Handle &bssh)
static bool ExtendToStopIfShortAndNotPartial(CSeq_feat &f, CBioseq_Handle bsh, bool check_for_stop=true)
Extends a coding region up to 50 nt.
Definition: cleanup.cpp:1210
void SetOrigin(TOrigin value)
Assign a value to Origin data member.
Definition: BioSource_.hpp:466
static CRef< CBioSource > BioSrcFromFeat(const CSeq_feat &f)
Get BioSource from feature to use for source descriptor.
Definition: cleanup.cpp:3124
bool IsSetMod(void) const
unstructured modifiers Check if a value has been assigned to Mod data member.
Definition: Org_ref_.hpp:426
TOrigin GetOrigin(void) const
Get the Origin member data.
Definition: BioSource_.hpp:460
TMod & SetMod(void)
Assign a value to Mod data member.
Definition: Org_ref_.hpp:444
Tdata & Set(void)
Assign a value to data member.
const TCdregion & GetCdregion(void) const
Get the variant data.
TMod & SetMod(void)
Assign a value to Mod data member.
Definition: OrgName_.hpp:821
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Definition: Seq_loc.hpp:437
CFeat_CI –.
Definition: feat_ci.hpp:63
bool IsTitle(void) const
Check if variant Title is selected.
Definition: Seqdesc_.hpp:1026
whole genome shotgun project
CBioseq_set_Handle GetParentBioseq_set(void) const
Return a handle for the parent Bioseq-set, or null handle.
map location of this sequence
Definition: Seqdesc_.hpp:119
CSeq_feat_EditHandle TakeFeat(const CSeq_feat_EditHandle &handle) const
bool RetranslateCDS(const CSeq_feat &cds, CScope &scope)
RetranslateCDS A function to replace the protein Bioseq pointed to by cds.product with the current tr...
Definition: feature.cpp:3616
void SetCit(const TCit &value)
Assign a value to Cit data member.
Definition: Cit_gen_.hpp:575
TBiomol GetBiomol(void) const
Get the Biomol member data.
Definition: MolInfo_.hpp:434
void SetPartialStart(bool val, ESeqLocExtremes ext)
set / remove e_Lim fuzz on start or stop (lt/gt - indicating partial interval)
Definition: Seq_loc.cpp:3262
const TTaxname & GetTaxname(void) const
Get the Taxname member data.
Definition: Org_ref_.hpp:358
TSeqPos GetStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:945
void SetOrg(TOrg &value)
Assign a value to Org data member.
Definition: BioSource_.cpp:108
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
bool IsSetKey(void) const
Check if a value has been assigned to Key data member.
Definition: Imp_feat_.hpp:235
void GetOverlappingFeatures(const CSeq_loc &loc, CSeqFeatData::E_Choice feat_type, CSeqFeatData::ESubtype feat_subtype, EOverlapType overlap_type, TFeatScores &feats, CScope &scope, const TBestFeatOpts opts=0, CGetOverlappingFeaturesPlugin *plugin=NULL)
Find all features overlapping the location.
Definition: sequence.cpp:863
static bool RemoveUnnecessaryGeneXrefs(CSeq_feat &f, CScope &scope)
Removes unnecessary Gene-xrefs.
Definition: cleanup.cpp:730
Check if seq-locs are overlapping.
conceptual transl. supplied by author
Definition: MolInfo_.hpp:136
a numbering system
Definition: Seqdesc_.hpp:118
void SetDescr(TDescr &value)
Assign a value to Descr data member.
Definition: Bioseq_.cpp:65
list< CRef< CArticleId > > Tdata
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
void ResetComment(void)
Reset Comment data member.
Definition: Seq_feat_.cpp:99
CSeqVector –.
Definition: seq_vector.hpp:64
int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)
Definition: thrddgri.c:44
#define EDIT_EACH_SEQDESC_ON_BIOSEQ(Itr, Var)
Definition: seq_macros.hpp:221
class CStaticArrayMap<> is an array adaptor that provides an STLish interface to statically-defined a...
Definition: static_map.hpp:103
2nd contained within 1st extremes
static bool RemovePseudoProduct(CSeq_feat &cds, CScope &scope)
Removes protein product from pseudo coding region.
Definition: cleanup.cpp:2187
void SetVal(const TVal &value)
Assign a value to Val data member.
Definition: Gb_qual_.hpp:251
bool IsSetMgcode(void) const
mitochondrial genetic code Check if a value has been assigned to Mgcode data member.
Definition: OrgName_.hpp:915
CConstRef< CSeq_annot > GetCompleteSeq_annot(void) const
Complete and return const reference to the current seq-annot.
list< CRef< CSeqdesc > > Tdata
Definition: Seq_descr_.hpp:91
CSeq_entry_Handle –.
bool IsSetOrgname(void) const
Check if a value has been assigned to Orgname data member.
Definition: Org_ref_.hpp:501
TSeq ConvertSetToSeq(void) const
Do the same as CollapseSet() when sub-entry is of type bioseq.
static bool FindMatchingLocusGene(CSeq_feat &f, const CGene_ref &gene_xref, CBioseq_Handle bsh)
Detects gene features with matching locus.
Definition: cleanup.cpp:800
TMuid & SetMuid(void)
Select the variant.
Definition: Pub_.hpp:615
Template structure SStaticPair is simlified replacement of STL pair<> Main reason of introducing this...
Definition: static_set.hpp:59
a reference to the publication
Definition: Seqdesc_.hpp:122
bool IsSet(void) const
Check if variant Set is selected.
Definition: Seq_entry_.hpp:263
static bool x_MergeDupOrgNames(COrgName &on1, const COrgName &add)
Definition: cleanup.cpp:2970
list< string > TMod
Definition: Org_ref_.hpp:100
Tdata & Set(void)
Assign a value to data member.
static int NextCodonState(int state, unsigned char ch)
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5275
const TDescr & GetDescr(void) const
static int match(register const unsigned char *eptr, register const uschar *ecode, const unsigned char *mstart, int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb, int flags, unsigned int rdepth)
Definition: pcre_exec.c:431
void SetChanged(EChanges e)
Definition: cleanup.cpp:283
bool HasMod(const COrg_ref &org, const string &mod)
Definition: cleanup.cpp:3012
bool IsSetData(void) const
user defined object
Definition: Seqdesc_.hpp:124
TPseudo GetPseudo(void) const
Get the Pseudo member data.
Definition: Seq_feat_.hpp:1323
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
const CSeq_descr & GetDescr(void) const
Definition: Seq_entry.cpp:120
CStaticPairArrayMap< CSeqdesc::E_Choice, int > TSeqdescOrderMap
Definition: cleanup.cpp:2373
const TRna & GetRna(void) const
Get the variant data.
const TSubtype & GetSubtype(void) const
Get the Subtype member data.
Definition: BioSource_.hpp:527
const TAnnot & GetAnnot(void) const
Get the Annot member data.
Definition: Bioseq_.hpp:356
static bool SeqLocExtend(CSeq_loc &loc, size_t pos, CScope &scope)
Extends a location to the specificed position.
Definition: cleanup.cpp:895
numerical value
Definition: Na_strand.hpp:63
bool IsSetDescr(void) const
TClass GetClass(void) const
.hpp User-defined methods of the data storage class.
Definition: Pubdesc.hpp:53
const TGene & GetGene(void) const
Get the variant data.
void ResetXref(void)
Reset Xref data member.
Definition: Seq_feat_.cpp:182
bool HasExceptionText(const string &exception_text) const
Returns whether or not the given exception_text is set for this feature.
Definition: Seq_feat.cpp:426
CSeq_annot_Handle –.
static bool SetGenePartialByLongestContainedFeature(CSeq_feat &gene, CScope &scope)
Set partialness of gene to match longest feature contained in gene.
Definition: cleanup.cpp:1476
const Tdata & Get(void) const
Get the member data.
Definition: Pub_equiv_.hpp:166
static bool RemoveNcbiCleanupObject(CSeq_entry &seq_entry)
Removes NcbiCleanup User Objects in the Seq-entry.
Definition: cleanup.cpp:1642
const TDescr & GetDescr(void) const
list< CRef< CPub > > TPub
Definition: Pub_set_.hpp:159
static string GetOrganelleByGenome(unsigned int genome)
Definition: BioSource.cpp:213
const TNames & GetNames(void) const
Get the Names member data.
Definition: Auth_list_.hpp:470
bool IsSetProduct(void) const
product of process Check if a value has been assigned to Product data member.
Definition: Seq_feat_.hpp:1049
CSeq_entry_Handle –.
static bool IsPseudo(const CSeq_feat &feat, CScope &scope)
Checks to see if a feature is pseudo.
Definition: cleanup.cpp:1175
CRef< CSeqdesc > RemoveSeqdesc(const CSeqdesc &v) const
static bool PubAlreadyInSet(const CPubdesc &pd, const CSeq_descr &descr)
Definition: cleanup.cpp:2714
#define EDIT_EACH_FEATURE_ON_ANNOT
Definition: seq_macros.hpp:434
PDB specific information.
Definition: Seqdesc_.hpp:131
bool IsSetLocus_tag(void) const
systematic gene name (e.g., MI0001, ORF0069) Check if a value has been assigned to Locus_tag data mem...
Definition: Gene_ref_.hpp:733
const CGene_ref * GetGeneXref(void) const
get gene (if present) from Seq-feat.xref list
The Object manager core.
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
Definition: Seq_feat_.hpp:885
void SetScope(CScope &scope)
Main methods.
bool IsSeq(void) const
Check if variant Seq is selected.
Definition: Seq_entry_.hpp:257
TPmid & SetPmid(void)
Select the variant.
Definition: Pub_.hpp:690
static string Join(const list< string > &arr, const CTempString delim)
Join strings using the specified delimiter.
Definition: ncbistr.cpp:3610
.hpp User-defined methods of the data storage class.
Definition: Gb_qual.hpp:60
const TDb & GetDb(void) const
Get the Db member data.
Definition: Org_ref_.hpp:463
bool IsIupacaa(void) const
Check if variant Iupacaa is selected.
Definition: Seq_data_.hpp:524
void AddScope(CScope &scope, TPriority pri=kPriority_Default)
Add the scope's datasources as a single group with the given priority All data sources (data loaders ...
Definition: scope.cpp:473
void ResetStrand(void)
Reset the strand on this location.
Definition: Seq_loc.cpp:5063
TSource & SetSource(void)
Select the variant.
Definition: Seqdesc_.cpp:572
#define EDIT_EACH_SEQANNOT_ON_BIOSEQ(Itr, Var)
Definition: seq_macros.hpp:266
TId & SetId(void)
Assign a value to Id data member.
Definition: Bioseq_.hpp:286
void ResetLocation(void)
Reset Location data member.
Definition: Seq_feat_.cpp:122
static void Translate(const string &seq, string &prot, const CGenetic_code *code, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=NULL, bool is_5prime_complete=true)
Translate a string using a specified genetic code.
Definition: sequence.cpp:3848
bool CanGetGenome(void) const
Check if it is safe to call GetGenome method.
Definition: BioSource_.hpp:391
void Remove(void) const
Remove current annot.
void SetData(TData &value)
Assign a value to Data data member.
void BasicCleanupBioseqHandle(CBioseq_Handle &bsh)
const GenericPointer< typename T::ValueType > T2 T::AllocatorType & a
Definition: pointer.h:1084
virtual bool Equals(const CSerialObject &object, ESerialRecursionMode how=eRecursive) const
Check if both objects contain the same values.
static vector< CConstRef< CPub > > GetCitationList(CBioseq_Handle bsh)
Get list of pubs that can be used for citations for Seq-feat on a Bioseq-handle.
Definition: cleanup.cpp:2602
bool IsStr(void) const
Check if variant Str is selected.
Definition: Auth_list_.hpp:436
PRF SEQDB.
Definition: Seq_id_.hpp:108
Definition: inftrees.h:24
TInst_Length GetInst_Length(void) const
char GetCodonResidue(int state) const
TChangeBits m_Changes
bool IsSetPub(void) const
the citation(s) Check if a value has been assigned to Pub data member.
Definition: Pubdesc_.hpp:571
CSeq_entry_EditHandle AttachEntry(CSeq_entry &entry, int index=-1) const
Attach an existing seq-entry.
bool IsProt(void) const
Check if variant Prot is selected.
bool IsMl(void) const
Check if variant Ml is selected.
Definition: Auth_list_.hpp:416
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_loc_.hpp:475
static bool RemoveUnseenTitles(CSeq_entry_EditHandle::TSeq seq)
Remove all titles in Seqdescr except the last, because it is the only one that would be displayed in ...
Definition: cleanup.cpp:2422
vector< CRef< CDbtag > > TDbxref
Definition: Seq_feat_.hpp:123
void ResetProduct(void)
Reset Product data member.
Definition: Seq_feat_.cpp:105
static bool SetGeneticCodes(CBioseq_Handle bsh)
Sets genetic codes for coding regions on Bioseq-Handle.
Definition: cleanup.cpp:1864
static CRef< CCleanupChange > makeCleanupChange(Uint4 options)
Definition: cleanup.cpp:105
bool IsMix(void) const
Check if variant Mix is selected.
Definition: Seq_loc_.hpp:552
bool IsSetMol(void) const
Check if a value has been assigned to Mol data member.
Definition: Seq_inst_.hpp:578
CConstRef< CSeq_feat > GetOverlappingGene(const CSeq_loc &loc, CScope &scope, ETransSplicing eTransSplicing=eTransSplicing_Auto)
Definition: sequence.cpp:1265
int GetId(void) const
CScope –.
Definition: scope.hpp:90
bool IsChanged(EChanges e) const
Definition: cleanup.cpp:277
static void GetPubdescLabels(const CPubdesc &pd, vector< int > &pmids, vector< int > &muids, vector< int > &serials, vector< string > &published_labels, vector< string > &unpublished_labels)
For Publication Citations Get labels for a pubdesc.
Definition: cleanup.cpp:2535
static bool AddGenBankWrapper(CSeq_entry_Handle seh)
Add GenBank Wrapper Set.
Definition: cleanup.cpp:2460
TTech GetTech(void) const
Get the Tech member data.
Definition: MolInfo_.hpp:484
const TLocus & GetLocus(void) const
Get the Locus member data.
Definition: Gene_ref_.hpp:485
const TSyn & GetSyn(void) const
Get the Syn member data.
Definition: Org_ref_.hpp:488
static bool RemoveNonsuppressingGeneXrefs(CSeq_feat &f)
Removes non-suppressing Gene-xrefs.
Definition: cleanup.cpp:776
TSeq & SetSeq(void)
Select the variant.
Definition: Seq_entry_.cpp:108
source of materials, includes Org-ref
Definition: Seqdesc_.hpp:133
const TQual & GetQual(void) const
Get the Qual member data.
Definition: Seq_feat_.hpp:1112
void SetInt(TInt &v)
Definition: Seq_loc.hpp:965
#define FOR_EACH_PUB_ON_PUBDESC(Itr, Var)
FOR_EACH_PUB_ON_PUBDESC EDIT_EACH_PUB_ON_PUBDESC.
Definition: pub_macros.hpp:127
bool IsSetStrand(EIsSetStrand flag=eIsSetStrand_Any) const
Check if strand is set for any/all part(s) of the seq-loc depending on the flag.
Definition: Seq_loc.cpp:840
static const CTrans_table & GetTransTable(int id)
Definition: Seq_entry.hpp:55
const TOrgname & GetOrgname(void) const
Get the Orgname member data.
Definition: Org_ref_.hpp:513
static bool ClearInternalPartials(CSeq_loc &loc, bool is_first=true, bool is_last=true)
Clear internal partials.
Definition: cleanup.cpp:1371
const TSet & GetSet(void) const
Get the variant data.
Definition: Seq_entry_.cpp:124
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
void BasicCleanupSeqFeat(CSeq_feat &sf)
static const char * str(char *buf, int n)
Definition: stats.c:84
info on the molecule and techniques
Definition: Seqdesc_.hpp:134
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
const TIupacaa & GetIupacaa(void) const
Get the variant data.
Definition: Seq_data_.hpp:530
int len
DEFINE_STATIC_ARRAY_MAP(TSeqdescOrderMap, sc_SeqdescOrderMap, sc_seqdesc_order_map)
TBase::const_iterator const_iterator
Definition: static_map.hpp:109
TGcode GetGcode(void) const
Get the Gcode member data.
Definition: OrgName_.hpp:887
void SetComment(const TComment &value)
Assign a value to Comment data member.
Definition: Seq_feat_.hpp:1030
const CSeq_annot_Handle & GetAnnot(void) const
Get handle to seq-annot for this feature.
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_id_.hpp:746
static bool NormalizeDescriptorOrder(CSeq_descr &descr)
Normalize Descriptor Order on a specific Seq-entry.
Definition: cleanup.cpp:2395
static const char *const sm_ChangeDesc[eNumberofChangeTypes+1]
list< CRef< COrgMod > > TMod
Definition: OrgName_.hpp:332
const TReply & GetReply(void) const
Get the Reply member data.
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:923
const CSeq_feat & GetOriginalFeature(void) const
Get original feature with unmapped location/product.
void ExtendedCleanup(CSeq_entry_Handle &seh)
xref to other databases
Definition: Seqdesc_.hpp:126
virtual CRef< CTaxon3_reply > SendOrgRefList(const vector< CRef< COrg_ref > > &list)
Definition: taxon3.cpp:161
void SetMgcode(TMgcode value)
Assign a value to Mgcode data member.
Definition: OrgName_.hpp:943
a more extensive comment
Definition: Seqdesc_.hpp:117
CScope & GetScope(void) const
Get scope this handle belongs to.
static CCdregion::EFrame FindBestFrame(const CSeq_feat &cds, CScope &scope)
Find "best" frame for a coding region.
Definition: sequence.cpp:4008
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string (in-place)
Definition: ncbistr.cpp:3072
date entry first created/released
Definition: Seqdesc_.hpp:128
CSeq_loc_Mapper –.
else result
Definition: token2.c:20
void SetGcode(TGcode value)
Assign a value to Gcode data member.
Definition: OrgName_.hpp:896
bool IsSetDescr(void) const
const TPub & GetPub(void) const
Get the variant data.
const TLineage & GetLineage(void) const
Get the Lineage member data.
Definition: OrgName_.hpp:840
bool IsTransSpliced(const CSeq_feat &feat)
Definition: cleanup.cpp:1130
conceptual translation
Definition: MolInfo_.hpp:131
list< CRef< CSeq_loc > > Tdata
list< CRef< CSeq_id > > TId
Definition: Bioseq_.hpp:94
PIR specific info.
Definition: Seqdesc_.hpp:120
void BasicCleanupSeqFeatHandle(CSeq_feat_Handle &sfh)
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
bool IsSetPages(void) const
Check if a value has been assigned to Pages data member.
Definition: Cit_gen_.hpp:763
#define FIELD_CHAIN_OF_2_IS_SET(Var, Fld1, Fld2)
FIELD_CHAIN_OF_2_IS_SET.
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:129
void SetScope(CScope *scope)
Definition: cleanup.cpp:95
void SetProt(TProt &v)
const CGene_ref * GetGeneXref(void) const
See related function in util/feature.hpp.
Definition: Seq_feat.cpp:169
#define NCBI_COMPLETENESS(Type)
Definition: seq_macros.hpp:130
static bool AreBioSourcesMergeable(const CBioSource &src1, const CBioSource &src2)
Definition: cleanup.cpp:2921
bool IsNa(void) const
#define _ASSERT
bool IsSetDesc(void) const
description (instead of name) Check if a value has been assigned to Desc data member.
Definition: Prot_ref_.hpp:377
vector< CRef< CDbtag > > TDb
Definition: Org_ref_.hpp:101
const TDiv & GetDiv(void) const
Get the Div member data.
Definition: OrgName_.hpp:974
TProcessed GetProcessed(void) const
Get the Processed member data.
Definition: Prot_ref_.hpp:517
a name for this sequence
Definition: Seqdesc_.hpp:114
void GetSourceDescriptors(const CSeq_entry &se, vector< const CSeqdesc * > &src_descs)
Definition: cleanup.cpp:1674
TMgcode GetMgcode(void) const
Get the Mgcode member data.
Definition: OrgName_.hpp:934
bool IsSetFig(void) const
figure in paper Check if a value has been assigned to Fig data member.
Definition: Pubdesc_.hpp:641
const CSeqFeatData & GetData(void) const
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
Definition: Seq_id.cpp:277
static bool EqualNocase(const CTempString str, SIZE_TYPE pos, SIZE_TYPE n, const char *pattern)
Case-insensitive equality of a substring with a pattern.
Definition: ncbistr.hpp:5221
static bool AddMissingMolInfo(CBioseq &seq, bool is_product)
Adds missing MolInfo descriptor to sequence.
Definition: cleanup.cpp:1558
namespace ncbi::objects::
Definition: Seq_feat.hpp:55
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:70
const CSeq_feat_Handle & GetSeq_feat_Handle(void) const
Get original feature handle.
Definition: mapped_feat.hpp:71
TObjectType * GetPointer(void) const THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:1564
bool IsSetMod(void) const
Check if a value has been assigned to Mod data member.
Definition: OrgName_.hpp:803
const TInst & GetInst(void) const
Get the Inst member data.
Definition: Bioseq_.hpp:326
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5291
static bool ExtendToStopCodon(CSeq_feat &f, CBioseq_Handle bsh, size_t limit, CCdregion::TFrame frame=CCdregion::eFrame_not_set)
Extends a feature up to limit nt to a stop codon, or to the end of the sequence if limit == 0 (partia...
Definition: cleanup.cpp:927
CBioseq_CI –.
Definition: bioseq_ci.hpp:68
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
Definition: Seq_loc.cpp:336
string GetRnaProductName(void) const
Definition: RNA_ref.cpp:145
CRef< CSeqdesc > RemoveSeqdesc(const CSeqdesc &d) const
bool IsGene(void) const
Check if variant Gene is selected.
TSyn & SetSyn(void)
Assign a value to Syn data member.
Definition: Org_ref_.hpp:494
const TDbxref & GetDbxref(void) const
Get the Dbxref member data.
Definition: Seq_feat_.hpp:1291
static bool MoveProteinSpecificFeats(CSeq_entry_Handle seh)
Moves protein-specific features from nucleotide sequences in the Seq-entry to the appropriate protein...
Definition: cleanup.cpp:675
#define CLEANUP_SETUP
Definition: cleanup.cpp:114
CConstRef< CCleanupChange > ExtendedCleanup(CSeq_entry &se, Uint4 options=0)
Cleanup a Seq-entry.
Definition: cleanup.cpp:229
CBioseq_set_EditHandle –.
bool IsSetDb(void) const
ids in taxonomic or culture dbases Check if a value has been assigned to Db data member.
Definition: Org_ref_.hpp:451
const TImp & GetImp(void) const
Get the variant data.
CSeq_feat_Handle GetSeq_featHandle(const CSeq_feat &feat, EMissing action=eMissing_Default)
Definition: scope.cpp:188
EMBL specific information.
Definition: Seqdesc_.hpp:127
const CTSE_Handle & GetTSE_Handle(void) const
Get CTSE_Handle of containing TSE.
list< CRef< CAuthor > > TStd
Definition: Auth_list_.hpp:170
bool IsSetTech(void) const
Check if a value has been assigned to Tech data member.
Definition: MolInfo_.hpp:459
const TXref & GetXref(void) const
Get the Xref member data.
Definition: Seq_feat_.hpp:1266
void SetMix(TMix &v)
Definition: Seq_loc.hpp:969
viral segments or mitochondrial minicircles
const TMl & GetMl(void) const
Get the variant data.
Definition: Auth_list_.hpp:422
CSeq_feat_Handle –.
just a nucleic acid
Definition: Seq_inst_.hpp:113
bool IsPartialStop(ESeqLocExtremes ext) const
Definition: Seq_loc.cpp:3233
E_Choice Which(void) const
Which variant is currently selected.
SStaticPair< CSeqdesc::E_Choice, int > TSeqdescOrderElem
Definition: cleanup.cpp:2343
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
bool IsSetNum(void) const
numbering from paper Check if a value has been assigned to Num data member.
Definition: Pubdesc_.hpp:681
static bool MergeDupBioSources(CSeq_descr &descr)
Definition: cleanup.cpp:3065
vector< CRef< CGb_qual > > TQual
Definition: Seq_feat_.hpp:117
vector< TFeatScore > TFeatScores
Definition: sequence.hpp:351
#define NCBI_ORGMOD(Type)
COrgMod definitions.
const TDesc & GetDesc(void) const
Get the Desc member data.
Definition: Prot_ref_.hpp:389
bool IsSetDescr(void) const
descriptors Check if a value has been assigned to Descr data member.
Definition: Bioseq_.hpp:293
CSeq_entry_Handle GetSeq_entry_Handle(void) const
Get parent Seq-entry handle.
static bool x_MergeDupOrgRefs(COrg_ref &org1, const COrg_ref &add)
Definition: cleanup.cpp:3026
TPub & SetPub(void)
Select the variant.
Definition: Seqdesc_.cpp:362
bool IsSetProcessed(void) const
Check if a value has been assigned to Processed data member.
Definition: Prot_ref_.hpp:492
#define FIELD_IS_SET(Var, Fld)
FIELD_IS_SET base macro.
void SetComment(const TComment &value)
Assign a value to Comment data member.
Definition: Pubdesc_.hpp:944
void SetPartialStop(bool val, ESeqLocExtremes ext)
Definition: Seq_loc.cpp:3295
TQual & SetQual(void)
Assign a value to Qual data member.
Definition: Seq_feat_.hpp:1118
virtual CConstRef< CSeq_feat > GetSeq_feat(void) const
.hpp User-defined methods of the data storage class.
Definition: OrgMod.hpp:53
TName & SetName(void)
Assign a value to Name data member.
Definition: Prot_ref_.hpp:370
const TMolinfo & GetMolinfo(void) const
Get the variant data.
Definition: Seqdesc_.cpp:588
const TStd & GetStd(void) const
Get the variant data.
Definition: Auth_list_.hpp:402
bool IsSet(void) const
bool IsSuppressed(void) const
Definition: Gene_ref.cpp:75
static bool Equal(const CTempString str, SIZE_TYPE pos, SIZE_TYPE n, const char *pattern, ECase use_case=eCase)
Test for equality of a substring with a pattern.
Definition: ncbistr.hpp:5247
bool AdjustForCDSPartials(const CSeq_feat &cds, CSeq_entry_Handle seh)
AdjustForCDSPartials A function to make all of the necessary related changes to a Seq-entry after the...
Definition: feature.cpp:3566
#define RAW_FIELD_IS_EMPTY_OR_UNSET(Var, Fld)
RAW_FIELD_IS_EMPTY_OR_UNSET macro.
bool IsSetJournal(void) const
Check if a value has been assigned to Journal data member.
Definition: Cit_gen_.hpp:662
vector< CSeq_id_Handle > TId
static void s_RemoveOrgFromEndOfProtein(CBioseq &seq, string taxname)
Definition: cleanup.cpp:1966
.hpp User-defined methods of the data storage class.
Definition: Auth_list.hpp:55
Definition: Dbtag.hpp:52
bool IsSetInst(void) const
CConstRef< CSeq_feat > GetOverlappingCDS(const CSeq_loc &loc, CScope &scope)
Definition: sequence.cpp:1321
Definition: set.hpp:44
static bool MoveFeatToProtein(CSeq_feat_Handle fh)
Moves one feature from nucleotide bioseq to the appropriate protein sequence.
Definition: cleanup.cpp:546
CRef< CSeq_loc > Seq_loc_Add(const CSeq_loc &loc1, const CSeq_loc &loc2, CSeq_loc::TOpFlags flags, CScope *scope)
Add two seq-locs.
nuc acid and coded proteins
Definition: Bioseq_set_.hpp:99
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
int GetGenCode(int def=1) const
Definition: BioSource.cpp:73
void SetProduct(TProduct &value)
Assign a value to Product data member.
Definition: Seq_feat_.cpp:110
const TProt & GetProt(void) const
Get the variant data.
const TLocus_tag & GetLocus_tag(void) const
Get the Locus_tag member data.
Definition: Gene_ref_.hpp:745
void SetSeq_data(TSeq_data &value)
Assign a value to Seq_data data member.
Definition: Seq_inst_.cpp:130
bool IsSetCode(void) const
genetic code used Check if a value has been assigned to Code data member.
Definition: Cdregion_.hpp:686
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:64
bool AddSeqdesc(CSeqdesc &d) const
genomic products, chrom+mRNA+protein
void BasicCleanupSeqAnnotHandle(CSeq_annot_Handle &sah)
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:98
TDb & SetDb(void)
Assign a value to Db data member.
Definition: Org_ref_.hpp:469
const TOrg & GetOrg(void) const
Get the Org member data.
Definition: BioSource_.hpp:497
bool IsSetSeq_data(void) const
the sequence Check if a value has been assigned to Seq_data data member.
Definition: Seq_inst_.hpp:790
void SetStrand(ENa_strand strand)
Set the strand for all of the location's ranges.
Definition: Seq_loc.cpp:5038
void ResetName(void)
Reset Name data member.
Definition: Prot_ref_.cpp:63
bool IsMolinfo(void) const
Check if variant Molinfo is selected.
Definition: Seqdesc_.hpp:1196
void s_GetAuthorsString(string *out_authors, const CAuth_list &auth_list)
Definition: cleanup.cpp:2472
const TComment & GetComment(void) const
Get the Comment member data.
Definition: Pubdesc_.hpp:935
static bool RemoveDupBioSource(CSeq_descr &descr)
Remove duplicate biosource descriptors.
Definition: cleanup.cpp:3094
bool AdjustFeaturePartialFlagForLocation(CSeq_feat &new_feat)
AdjustFeaturePartialFlagForLocation A function to ensure that Seq-feat.partial is set if either end o...
Definition: feature.cpp:3489
static string NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:4277
CBioSource::TGenome TBIOSOURCE_GENOME
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:756
void SetGenome(TGenome value)
Assign a value to Genome data member.
Definition: BioSource_.hpp:416
bool AddSeqdesc(CSeqdesc &d) const
const TPrim & Get(void) const
Definition: serialbase.hpp:306
TSeq GetSeq(void) const
void Rewind(void)
Definition: feat_ci.hpp:209
CCdregion –.
Definition: Cdregion.hpp:65
bool IsSetQual(void) const
qualifiers Check if a value has been assigned to Qual data member.
Definition: Seq_feat_.hpp:1100
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:897
void SetCode(TCode &value)
Assign a value to Code data member.
Definition: Cdregion_.cpp:68
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
User-defined methods of the data storage class.
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
static bool FindMatchingLocus_tagGene(CSeq_feat &f, const CGene_ref &gene_xref, CBioseq_Handle bsh)
Detects gene features with matching locus_tag.
Definition: cleanup.cpp:847
void SetDiv(const TDiv &value)
Assign a value to Div data member.
Definition: OrgName_.hpp:983
Modified on Sun Jul 24 16:18:56 2016 by modify_doxy.py rev. 506947