NCBI C++ ToolKit
unit_test_cds_fix.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

00001 /*  $Id: unit_test_cds_fix.cpp 63985 2014-08-08 14:05:11Z chenj $
00002 * ===========================================================================
00003 *
00004 *                            PUBLIC DOMAIN NOTICE
00005 *               National Center for Biotechnology Information
00006 *
00007 *  This software/database is a "United States Government Work" under the
00008 *  terms of the United States Copyright Act.  It was written as part of
00009 *  the author's official duties as a United States Government employee and
00010 *  thus cannot be copyrighted.  This software/database is freely available
00011 *  to the public for use. The National Library of Medicine and the U.S.
00012 *  Government have not placed any restriction on its use or reproduction.
00013 *
00014 *  Although all reasonable efforts have been taken to ensure the accuracy
00015 *  and reliability of the software and data, the NLM and the U.S.
00016 *  Government do not and cannot warrant the performance or results that
00017 *  may be obtained by using this software or data. The NLM and the U.S.
00018 *  Government disclaim all warranties, express or implied, including
00019 *  warranties of performance, merchantability or fitness for any particular
00020 *  purpose.
00021 *
00022 *  Please cite the author in any work or product based on this material.
00023 *
00024 * ===========================================================================
00025 *
00026 * Author:  Colleen Bollin, Jie Chen, NCBI
00027 *
00028 * File Description:
00029 *   Unit tests for the field handlers.
00030 *
00031 * ===========================================================================
00032 */
00033 
00034 #include <ncbi_pch.hpp>
00035 
00036 #include "unit_test_cds_fix.hpp"
00037 
00038 #include <corelib/ncbi_system.hpp>
00039 
00040 // This macro should be defined before inclusion of test_boost.hpp in all
00041 // "*.cpp" files inside executable except one. It is like function main() for
00042 // non-Boost.Test executables is defined only in one *.cpp file - other files
00043 // should not include it. If NCBI_BOOST_NO_AUTO_TEST_MAIN will not be defined
00044 // then test_boost.hpp will define such "main()" function for tests.
00045 //
00046 // Usually if your unit tests contain only one *.cpp file you should not
00047 // care about this macro at all.
00048 //
00049 //#define NCBI_BOOST_NO_AUTO_TEST_MAIN
00050 
00051 
00052 // This header must be included before all Boost.Test headers if there are any
00053 #include <corelib/test_boost.hpp>
00054 
00055 #include <objects/biblio/Id_pat.hpp>
00056 #include <objects/biblio/Title.hpp>
00057 #include <objects/general/Object_id.hpp>
00058 #include <objects/general/Dbtag.hpp>
00059 #include <objects/general/User_object.hpp>
00060 #include <objects/macro/Simple_replace.hpp>
00061 #include <objects/macro/Replace_func.hpp>
00062 #include <objects/macro/Replace_rule.hpp>
00063 #include <objects/macro/Search_func.hpp>
00064 #include <objects/macro/String_constraint.hpp>
00065 #include <objects/macro/Suspect_rule.hpp>
00066 #include <objects/macro/Word_substitution.hpp>
00067 #include <objects/macro/Word_substitution_set.hpp>
00068 #include <objects/medline/Medline_entry.hpp>
00069 #include <objects/misc/sequence_macros.hpp>
00070 #include <objects/pub/Pub_equiv.hpp>
00071 #include <objects/pub/Pub.hpp>
00072 #include <objects/seqset/Seq_entry.hpp>
00073 #include <objects/seq/GIBB_mol.hpp>
00074 #include <objects/seq/Seq_ext.hpp>
00075 #include <objects/seq/Delta_ext.hpp>
00076 #include <objects/seq/Delta_seq.hpp>
00077 #include <objects/seq/Seq_literal.hpp>
00078 #include <objects/seq/Ref_ext.hpp>
00079 #include <objects/seq/Map_ext.hpp>
00080 #include <objects/seq/Seg_ext.hpp>
00081 #include <objects/seq/Seq_gap.hpp>
00082 #include <objects/seq/Seq_data.hpp>
00083 #include <objects/seq/Seq_descr.hpp>
00084 #include <objects/seq/Seqdesc.hpp>
00085 #include <objects/seq/MolInfo.hpp>
00086 #include <objects/seq/Pubdesc.hpp>
00087 #include <objects/seq/Seq_hist.hpp>
00088 #include <objects/seq/Seq_hist_rec.hpp>
00089 #include <objects/seq/Seq_annot.hpp>
00090 #include <objects/seqalign/Dense_seg.hpp>
00091 #include <objects/seqblock/GB_block.hpp>
00092 #include <objects/seqblock/EMBL_block.hpp>
00093 #include <objects/seqfeat/BioSource.hpp>
00094 #include <objects/seqfeat/Org_ref.hpp>
00095 #include <objects/seqfeat/OrgName.hpp>
00096 #include <objects/seqfeat/SubSource.hpp>
00097 #include <objects/seqfeat/Imp_feat.hpp>
00098 #include <objects/seqfeat/Cdregion.hpp>
00099 #include <objects/seqloc/Seq_id.hpp>
00100 #include <objects/seqloc/PDB_seq_id.hpp>
00101 #include <objects/seqloc/Giimport_id.hpp>
00102 #include <objects/seqloc/Patent_seq_id.hpp>
00103 #include <objects/seqloc/Seq_loc.hpp>
00104 #include <objects/seqloc/Seq_interval.hpp>
00105 #include <objects/valid/Comment_set.hpp>
00106 #include <objects/valid/Comment_rule.hpp>
00107 #include <objmgr/object_manager.hpp>
00108 #include <objmgr/scope.hpp>
00109 #include <objmgr/bioseq_ci.hpp>
00110 #include <objmgr/feat_ci.hpp>
00111 #include <objmgr/seq_vector.hpp>
00112 #include <objmgr/util/sequence.hpp>
00113 #include <objmgr/util/seq_loc_util.hpp>
00114 #include <objmgr/seqdesc_ci.hpp>
00115 #include <objmgr/bioseq_set_handle.hpp>
00116 #include <objects/seq/seqport_util.hpp>
00117 #include <objtools/data_loaders/genbank/gbloader.hpp>
00118 #include <objtools/unit_test_util/unit_test_util.hpp>
00119 #include <corelib/ncbiapp.hpp>
00120 
00121 #include <objtools/unit_test_util/unit_test_util.hpp>
00122 #include <objtools/edit/cds_fix.hpp>
00123 
00124 
00125 BEGIN_NCBI_SCOPE
00126 BEGIN_SCOPE(objects)
00127 
00128 
00129 
00130 
00131 
00132 NCBITEST_INIT_TREE()
00133 {
00134     if ( !CNcbiApplication::Instance()->GetConfig().HasEntry("NCBI", "Data") ) {
00135     }
00136 }
00137 
00138 static bool s_debugMode = false;
00139 
00140 NCBITEST_INIT_CMDLINE(arg_desc)
00141 {
00142     // Here we make descriptions of command line parameters that we are
00143     // going to use.
00144 
00145     arg_desc->AddFlag( "debug_mode",
00146         "Debugging mode writes errors seen for each test" );
00147 }
00148 
00149 NCBITEST_AUTO_INIT()
00150 {
00151     // initialization function body
00152 
00153     const CArgs& args = CNcbiApplication::Instance()->GetArgs();
00154     if (args["debug_mode"]) {
00155         s_debugMode = true;
00156     }
00157 }
00158 
00159 
00160 void CheckTerminalExceptionResults (CSeq_feat& cds, CScope& scope,
00161                   bool strict, bool extend,
00162                   bool expected_rval, bool set_codebreak, 
00163                   bool set_comment, TSeqPos expected_endpoint)
00164 {
00165     const CCdregion& cdr = cds.GetData().GetCdregion();
00166     BOOST_CHECK_EQUAL(edit::SetTranslExcept(cds, 
00167                                             "TAA stop codon is completed by the addition of 3' A residues to the mRNA",
00168                                             strict, extend, scope),
00169                       expected_rval);
00170     BOOST_CHECK_EQUAL(cdr.IsSetCode_break(), set_codebreak);
00171     if (set_codebreak) {
00172         BOOST_CHECK_EQUAL(cdr.GetCode_break().size(), 1);
00173     }
00174 
00175     BOOST_CHECK_EQUAL(cds.IsSetComment(), set_comment);
00176     if (set_comment) {
00177         BOOST_CHECK_EQUAL(cds.GetComment(), "TAA stop codon is completed by the addition of 3' A residues to the mRNA");
00178     }
00179     BOOST_CHECK_EQUAL(cds.GetLocation().GetStop(eExtreme_Biological), expected_endpoint);
00180 }
00181 
00182 
00183 void OneTerminalTranslationExceptionTest(bool strict, bool extend, TSeqPos endpoint,
00184                                          const string& seq,
00185                                          bool expected_rval, bool set_codebreak, bool set_comment,                                         
00186                                          TSeqPos expected_endpoint)
00187 {
00188     CRef<CSeq_entry> entry = unit_test_util::BuildGoodNucProtSet();
00189     STANDARD_SETUP
00190 
00191     CRef<CSeq_feat> cds = unit_test_util::GetCDSFromGoodNucProtSet (entry);
00192     CCdregion& cdr = cds->SetData().SetCdregion();
00193     CBioseq& nuc_seq = entry->SetSet().SetSeq_set().front()->SetSeq();
00194     nuc_seq.SetInst().SetSeq_data().SetIupacna().Set(seq);
00195     cds->SetLocation().SetInt().SetTo(endpoint);
00196     
00197     // Should not set translation exception if coding region already has stop codon
00198     CheckTerminalExceptionResults(*cds, seh.GetScope(),
00199                                   strict, extend, expected_rval, 
00200                                   set_codebreak, set_comment, expected_endpoint);
00201 
00202     cdr.ResetCode_break();
00203     cds->ResetComment();
00204     cds->SetLocation().SetInt().SetTo(endpoint);
00205 
00206     // same results if reverse-complement
00207     scope.RemoveTopLevelSeqEntry(seh);
00208     unit_test_util::RevComp(entry);
00209     seh = scope.AddTopLevelSeqEntry(*entry);
00210     CheckTerminalExceptionResults(*cds, seh.GetScope(),
00211                                   strict, extend, expected_rval, 
00212                                   set_codebreak, set_comment, 
00213                                   nuc_seq.GetLength() - expected_endpoint - 1);
00214 }
00215 
00216 
00217 BOOST_AUTO_TEST_CASE(Test_AddTerminalTranslationException)
00218 {
00219     string original_seq = "ATGCCCAGAAAAACAGAGATAAACTAAGGGATGCCCAGAAAAACAGAGATAAACTAAGGG";
00220     // no change if normal
00221     OneTerminalTranslationExceptionTest(true, true, 26, 
00222                                         original_seq,
00223                                         false, false, false, 26);
00224 
00225     // should not set translation exception, but should extend to cover stop codon if extend is true
00226     OneTerminalTranslationExceptionTest(true, true, 23, 
00227                                         original_seq,
00228                                         true, false, false, 26);
00229 
00230     // but no change if extend flag is false
00231     OneTerminalTranslationExceptionTest(true, false, 23, 
00232                                         original_seq,
00233                                         false, false, false, 23);
00234 
00235     // should be set if last A in stop codon is replaced with other NT and coding region is one shorter
00236     string changed_seq = original_seq;
00237     changed_seq[26] = 'C';
00238     OneTerminalTranslationExceptionTest(true, true, 25, 
00239                                         changed_seq,
00240                                         true, true, true, 25);
00241 
00242     // should extend for partial stop codon and and add terminal exception if coding region missing
00243     // entire last codon
00244     OneTerminalTranslationExceptionTest(true, true, 23, 
00245                                         changed_seq,
00246                                         true, true, true, 25);
00247 
00248     // for non-strict, first NT could be N
00249     changed_seq[24] = 'N';
00250     OneTerminalTranslationExceptionTest(false, true, 25, 
00251                                         changed_seq,
00252                                         true, true, true, 25);
00253     // but not for strict
00254     OneTerminalTranslationExceptionTest(true, true, 23, 
00255                                         changed_seq,
00256                                         false, false, false, 23);
00257 
00258 
00259 }
00260 
00261 
00262 BOOST_AUTO_TEST_CASE(Test_FeaturePartialSynchronization)
00263 {
00264     CRef<CSeq_entry> entry = unit_test_util::BuildGoodNucProtSet();
00265     STANDARD_SETUP
00266 
00267     CRef<CSeq_feat> cds = unit_test_util::GetCDSFromGoodNucProtSet (entry);
00268     CRef<CSeq_entry> prot_seq = unit_test_util::GetProteinSequenceFromGoodNucProtSet (entry);
00269     CRef<CSeq_feat> prot_feat = unit_test_util::GetProtFeatFromGoodNucProtSet (entry);
00270     CRef<CSeqdesc> prot_molinfo;
00271     NON_CONST_ITERATE(CBioseq::TDescr::Tdata, it, prot_seq->SetSeq().SetDescr().Set()) {
00272         if ((*it)->IsMolinfo()) {
00273             prot_molinfo.Reset(it->GetPointer());
00274         }
00275     }
00276 
00277     // establish that everything is ok before
00278     BOOST_CHECK_EQUAL(cds->GetLocation().IsPartialStart(eExtreme_Biological), false);
00279     BOOST_CHECK_EQUAL(cds->GetLocation().IsPartialStop(eExtreme_Biological), false);
00280     BOOST_CHECK_EQUAL(cds->IsSetPartial(), false);
00281     BOOST_CHECK_EQUAL(edit::AdjustFeaturePartialFlagForLocation(*cds), false);
00282     BOOST_CHECK_EQUAL(prot_feat->GetLocation().IsPartialStart(eExtreme_Biological), false);
00283     BOOST_CHECK_EQUAL(prot_feat->GetLocation().IsPartialStop(eExtreme_Biological), false);
00284     BOOST_CHECK_EQUAL(prot_feat->IsSetPartial(), false);
00285     BOOST_CHECK_EQUAL(edit::AdjustProteinFeaturePartialsToMatchCDS(*prot_feat, *cds), false);
00286     BOOST_CHECK_EQUAL(prot_molinfo->GetMolinfo().GetCompleteness(), (CMolInfo::TCompleteness)CMolInfo::eCompleteness_complete);
00287     BOOST_CHECK_EQUAL(edit::AdjustProteinMolInfoToMatchCDS(prot_molinfo->SetMolinfo(), *cds), false);
00288     BOOST_CHECK_EQUAL(edit::AdjustForCDSPartials(*cds, seh), false);
00289 
00290     cds->SetLocation().SetPartialStart(true, eExtreme_Biological);
00291     BOOST_CHECK_EQUAL(cds->IsSetPartial(), false);
00292     BOOST_CHECK_EQUAL(edit::AdjustFeaturePartialFlagForLocation(*cds), true);
00293     BOOST_CHECK_EQUAL(cds->IsSetPartial(), true);
00294             
00295     BOOST_CHECK_EQUAL(edit::AdjustProteinFeaturePartialsToMatchCDS(*prot_feat, *cds), true);
00296     BOOST_CHECK_EQUAL(prot_feat->GetLocation().IsPartialStart(eExtreme_Biological), true);
00297     BOOST_CHECK_EQUAL(prot_feat->GetLocation().IsPartialStop(eExtreme_Biological), false);
00298     BOOST_CHECK_EQUAL(prot_feat->IsSetPartial(), true);
00299 
00300     BOOST_CHECK_EQUAL(edit::AdjustProteinMolInfoToMatchCDS(prot_molinfo->SetMolinfo(), *cds), true);
00301     BOOST_CHECK_EQUAL(prot_molinfo->GetMolinfo().GetCompleteness(), (CMolInfo::TCompleteness)CMolInfo::eCompleteness_no_left);
00302 
00303     // all changes in one go
00304     cds->SetLocation().SetPartialStart(false, eExtreme_Biological);
00305     BOOST_CHECK_EQUAL(edit::AdjustFeaturePartialFlagForLocation(*cds), true);
00306     BOOST_CHECK_EQUAL(edit::AdjustForCDSPartials(*cds, seh), true);
00307     prot_feat = unit_test_util::GetProtFeatFromGoodNucProtSet (entry);
00308     BOOST_CHECK_EQUAL(prot_feat->GetLocation().IsPartialStart(eExtreme_Biological), false);
00309     BOOST_CHECK_EQUAL(prot_feat->GetLocation().IsPartialStop(eExtreme_Biological), false);
00310     BOOST_CHECK_EQUAL(prot_feat->IsSetPartial(), false);
00311     BOOST_CHECK_EQUAL(prot_molinfo->GetMolinfo().GetCompleteness(), (CMolInfo::TCompleteness)CMolInfo::eCompleteness_complete);
00312 
00313 }
00314 
00315 
00316 BOOST_AUTO_TEST_CASE(Test_MakemRNAforCDS)
00317 {
00318     CRef<CSeq_entry> entry = unit_test_util::BuildGoodNucProtSet();
00319     STANDARD_SETUP
00320 
00321     CRef<CSeq_feat> cds = unit_test_util::GetCDSFromGoodNucProtSet (entry);
00322     CRef<CSeq_feat> mrna = edit::MakemRNAforCDS(*cds, scope);
00323     BOOST_CHECK_EQUAL(sequence::Compare(cds->GetLocation(), mrna->GetLocation(), &scope), sequence::eSame);
00324     BOOST_CHECK_EQUAL(mrna->GetLocation().IsPartialStart(eExtreme_Biological), true);
00325     BOOST_CHECK_EQUAL(mrna->GetLocation().IsPartialStop(eExtreme_Biological), true);
00326 
00327     // with a 3' UTR
00328     scope.RemoveTopLevelSeqEntry(seh);
00329     CRef<objects::CSeq_entry> nuc_seq = unit_test_util::GetNucleotideSequenceFromGoodNucProtSet (entry);
00330     CRef<CSeq_feat> utr3 = unit_test_util::AddGoodImpFeat(nuc_seq, "3'UTR");
00331     utr3->ResetComment();
00332     utr3->SetLocation().SetInt().SetFrom(27);
00333     utr3->SetLocation().SetInt().SetTo(30);
00334     seh = scope.AddTopLevelSeqEntry(*entry);
00335 
00336     mrna = edit::MakemRNAforCDS(*cds, scope);
00337     BOOST_CHECK_EQUAL(sequence::Compare(cds->GetLocation(), mrna->GetLocation(), &scope), sequence::eContained);
00338     BOOST_CHECK_EQUAL(mrna->GetLocation().IsPartialStart(eExtreme_Biological), true);
00339     BOOST_CHECK_EQUAL(mrna->GetLocation().IsPartialStop(eExtreme_Biological), false);
00340     BOOST_CHECK_EQUAL(mrna->GetLocation().GetStop(eExtreme_Biological), utr3->GetLocation().GetStop(eExtreme_Biological));
00341 
00342     // with a 5' UTR and a 3' UTR
00343     scope.RemoveTopLevelSeqEntry(seh);
00344     CRef<CSeq_feat> utr5 = unit_test_util::AddGoodImpFeat(nuc_seq, "5'UTR");
00345     utr5->ResetComment();
00346     utr5->SetLocation().SetInt().SetFrom(0);
00347     utr5->SetLocation().SetInt().SetTo(2);
00348     cds->SetLocation().SetInt().SetFrom(3);
00349     seh = scope.AddTopLevelSeqEntry(*entry);
00350     mrna = edit::MakemRNAforCDS(*cds, scope);
00351     BOOST_CHECK_EQUAL(sequence::Compare(cds->GetLocation(), mrna->GetLocation(), &scope), sequence::eContained);
00352     BOOST_CHECK_EQUAL(mrna->GetLocation().IsPartialStart(eExtreme_Biological), false);
00353     BOOST_CHECK_EQUAL(mrna->GetLocation().IsPartialStop(eExtreme_Biological), false);
00354     BOOST_CHECK_EQUAL(mrna->GetLocation().GetStart(eExtreme_Biological), utr5->GetLocation().GetStart(eExtreme_Biological));
00355     BOOST_CHECK_EQUAL(mrna->GetLocation().GetStop(eExtreme_Biological), utr3->GetLocation().GetStop(eExtreme_Biological));
00356 
00357     scope.RemoveTopLevelSeqEntry(seh);
00358     unit_test_util::AddFeat(mrna, nuc_seq);
00359     seh = scope.AddTopLevelSeqEntry(*entry);
00360     // should not create another mRNA if one is already on the record with the right product name
00361     CRef<CSeq_feat> mrna2 = edit::MakemRNAforCDS(*cds, scope);
00362     BOOST_REQUIRE(!mrna2);
00363 
00364     // but will create if the existing mRNA has the wrong product
00365     mrna->SetData().SetRna().SetExt().SetName("abc");
00366     mrna2 = edit::MakemRNAforCDS(*cds, scope);
00367     BOOST_CHECK_EQUAL(sequence::Compare(mrna2->GetLocation(), mrna->GetLocation(), &scope), sequence::eSame);
00368 
00369 }
00370 
00371 
00372 BOOST_AUTO_TEST_CASE(Test_GetGeneticCodeForBioseq)
00373 {
00374     CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
00375     STANDARD_SETUP
00376 
00377     CBioseq_CI bi(seh, CSeq_inst::eMol_na);
00378     CRef<CGenetic_code> code = edit::GetGeneticCodeForBioseq(*bi);
00379     BOOST_REQUIRE(!code);
00380 
00381     unit_test_util::SetGcode(entry, 6);
00382     code = edit::GetGeneticCodeForBioseq(*bi);
00383     BOOST_CHECK_EQUAL(code->GetId(), 6);
00384 
00385     unit_test_util::SetGenome(entry, CBioSource::eGenome_mitochondrion);
00386     code = edit::GetGeneticCodeForBioseq(*bi);
00387     BOOST_REQUIRE(!code);
00388 
00389     unit_test_util::SetMGcode(entry, 2);
00390     code = edit::GetGeneticCodeForBioseq(*bi);
00391     BOOST_CHECK_EQUAL(code->GetId(), 2);
00392 
00393     unit_test_util::SetGenome(entry, CBioSource::eGenome_apicoplast);
00394     code = edit::GetGeneticCodeForBioseq(*bi);
00395     BOOST_CHECK_EQUAL(code->GetId(), 11);
00396 
00397     unit_test_util::SetPGcode(entry, 12);
00398     code = edit::GetGeneticCodeForBioseq(*bi);
00399     BOOST_CHECK_EQUAL(code->GetId(), 12);
00400 }
00401 
00402 
00403 BOOST_AUTO_TEST_CASE(Test_TruncateCDSAtStop)
00404 {
00405     CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
00406     entry->SetSeq().SetInst().SetSeq_data().SetIupacna().Set("AATTGGCCAAAATTGGCCAAATAAGTAAATAATTGGCCAAAATTGGCCAAAATTGGCCAA");
00407     CRef<CSeq_feat> cds = unit_test_util::AddMiscFeature(entry, entry->GetSeq().GetInst().GetLength() - 1);
00408     cds->SetLocation().SetPartialStop(true, eExtreme_Biological);
00409     cds->SetData().SetCdregion();
00410     STANDARD_SETUP
00411 
00412     // check for frame 1/unset
00413     bool found_stop = edit::TruncateCDSAtStop(*cds, scope);
00414     BOOST_CHECK_EQUAL(found_stop, true);
00415     BOOST_CHECK_EQUAL(cds->GetLocation().GetStop(eExtreme_Biological), 23);
00416     BOOST_CHECK_EQUAL(cds->GetLocation().IsPartialStop(eExtreme_Biological), false);
00417 
00418     // check for frame 2
00419     cds->SetData().SetCdregion().SetFrame(CCdregion::eFrame_two);
00420     cds->SetLocation().SetInt().SetTo(entry->GetSeq().GetInst().GetLength() - 1);
00421     cds->SetLocation().SetPartialStop(true, eExtreme_Biological);
00422     found_stop = edit::TruncateCDSAtStop(*cds, scope);
00423     BOOST_CHECK_EQUAL(found_stop, true);
00424     BOOST_CHECK_EQUAL(cds->GetLocation().GetStop(eExtreme_Biological), 27);
00425     BOOST_CHECK_EQUAL(cds->GetLocation().IsPartialStop(eExtreme_Biological), false);
00426 
00427     // check for frame 3
00428     cds->SetData().SetCdregion().SetFrame(CCdregion::eFrame_three);
00429     cds->SetLocation().SetInt().SetTo(entry->GetSeq().GetInst().GetLength() - 1);
00430     cds->SetLocation().SetPartialStop(true, eExtreme_Biological);
00431     found_stop = edit::TruncateCDSAtStop(*cds, scope);
00432     BOOST_CHECK_EQUAL(found_stop, true);
00433     BOOST_CHECK_EQUAL(cds->GetLocation().GetStop(eExtreme_Biological), 31);
00434     BOOST_CHECK_EQUAL(cds->GetLocation().IsPartialStop(eExtreme_Biological), false);
00435 
00436 
00437 }
00438 
00439 
00440 BOOST_AUTO_TEST_CASE(Test_ExtendCDSToStopCodon)
00441 {
00442     CRef<CSeq_entry> entry = unit_test_util::BuildGoodSeq();
00443     entry->SetSeq().SetInst().SetSeq_data().SetIupacna().Set("AATTGGCCAAAATTGGCCAAATAAGTAAATAATTGGCCAAAATTGGCCAAAATTGGCCAA");
00444     CRef<CSeq_feat> cds = unit_test_util::AddMiscFeature(entry, 15);
00445     cds->SetLocation().SetPartialStop(true, eExtreme_Biological);
00446     cds->SetData().SetCdregion();
00447     STANDARD_SETUP
00448 
00449     // check for frame 1/unset
00450     bool found_stop = edit::ExtendCDSToStopCodon(*cds, scope);
00451     BOOST_CHECK_EQUAL(found_stop, true);
00452     BOOST_CHECK_EQUAL(cds->GetLocation().GetStop(eExtreme_Biological), 23);
00453     BOOST_CHECK_EQUAL(cds->GetLocation().IsPartialStop(eExtreme_Biological), false);
00454 
00455     // check for frame 2
00456     cds->SetData().SetCdregion().SetFrame(CCdregion::eFrame_two);
00457     cds->SetLocation().SetInt().SetTo(15);
00458     cds->SetLocation().SetPartialStop(true, eExtreme_Biological);
00459     found_stop = edit::ExtendCDSToStopCodon(*cds, scope);
00460     BOOST_CHECK_EQUAL(found_stop, true);
00461     BOOST_CHECK_EQUAL(cds->GetLocation().GetStop(eExtreme_Biological), 27);
00462     BOOST_CHECK_EQUAL(cds->GetLocation().IsPartialStop(eExtreme_Biological), false);
00463 
00464     // check for frame 3
00465     cds->SetData().SetCdregion().SetFrame(CCdregion::eFrame_three);
00466     cds->SetLocation().SetInt().SetTo(15);
00467     cds->SetLocation().SetPartialStop(true, eExtreme_Biological);
00468     found_stop = edit::ExtendCDSToStopCodon(*cds, scope);
00469     BOOST_CHECK_EQUAL(found_stop, true);
00470     BOOST_CHECK_EQUAL(cds->GetLocation().GetStop(eExtreme_Biological), 31);
00471     BOOST_CHECK_EQUAL(cds->GetLocation().IsPartialStop(eExtreme_Biological), false);
00472 
00473 
00474 }
00475 
00476 
00477 BOOST_AUTO_TEST_CASE(Test_MakemRNAAnnotOnly)
00478 {
00479     CRef<CSeq_feat> cds(new CSeq_feat());
00480     cds->SetData().SetCdregion();
00481     cds->SetLocation().SetInt().SetId().SetLocal().SetStr("abc");
00482     cds->SetLocation().SetInt().SetFrom(10);
00483     cds->SetLocation().SetInt().SetTo(40);
00484     CRef<CSeq_annot> annot(new CSeq_annot());
00485     annot->SetData().SetFtable().push_back(cds);
00486 
00487     CScope scope(*CObjectManager::GetInstance());
00488     scope.AddDefaults();
00489     CSeq_annot_Handle sah = scope.AddSeq_annot(*annot);
00490     CFeat_CI it(sah);
00491     while (it) {
00492         if (it->GetFeatSubtype() == CSeqFeatData::eSubtype_cdregion) {
00493             const CSeq_feat& cds = it->GetOriginalFeature();
00494             CRef<CSeq_feat> pRna = edit::MakemRNAforCDS(cds, scope); //<-- blows up on NULL ptr !!!
00495             BOOST_CHECK_EQUAL(pRna->GetLocation().GetStart(eExtreme_Biological), 10);
00496             BOOST_CHECK_EQUAL(pRna->GetLocation().GetStop(eExtreme_Biological), 40);
00497         }
00498         ++it;
00499     }
00500 
00501     scope.RemoveSeq_annot(sah);
00502     CRef<CSeq_feat> utr5(new CSeq_feat());
00503     utr5->SetData().SetImp().SetKey("5'UTR");
00504     utr5->SetLocation().SetInt().SetId().SetLocal().SetStr("abc");
00505     utr5->SetLocation().SetInt().SetFrom(0);
00506     utr5->SetLocation().SetInt().SetTo(9);
00507     annot->SetData().SetFtable().push_back(utr5);
00508     CRef<CSeq_feat> utr3(new CSeq_feat());
00509     utr3->SetData().SetImp().SetKey("3'UTR");
00510     utr3->SetLocation().SetInt().SetId().SetLocal().SetStr("abc");
00511     utr3->SetLocation().SetInt().SetFrom(41);
00512     utr3->SetLocation().SetInt().SetTo(50);
00513     annot->SetData().SetFtable().push_back(utr3);
00514 
00515     sah = scope.AddSeq_annot(*annot);
00516     CFeat_CI it2(sah);
00517     while (it2) {
00518         if (it2->GetFeatSubtype() == CSeqFeatData::eSubtype_cdregion) {
00519             const CSeq_feat& cds = it2->GetOriginalFeature();
00520             CRef<CSeq_feat> pRna = edit::MakemRNAforCDS(cds, scope); //<-- blows up on NULL ptr !!!
00521             BOOST_CHECK_EQUAL(pRna->GetLocation().GetStart(eExtreme_Biological), 0);
00522             BOOST_CHECK_EQUAL(pRna->GetLocation().GetStop(eExtreme_Biological), 50);
00523         }
00524         ++it2;
00525     }
00526 
00527     // should not make mRNA if one already exists
00528     scope.RemoveSeq_annot(sah);
00529     CRef<CSeq_feat> mrna(new CSeq_feat());
00530     mrna->SetData().SetRna().SetType(CRNA_ref::eType_mRNA);
00531     mrna->SetData().SetRna().SetExt().SetName("");
00532     mrna->SetLocation().SetInt().SetId().SetLocal().SetStr("abc");
00533     mrna->SetLocation().SetInt().SetFrom(10);
00534     mrna->SetLocation().SetInt().SetTo(40);
00535     annot->SetData().SetFtable().push_back(mrna);
00536     BOOST_CHECK_EQUAL(mrna->GetData().GetSubtype(), CSeqFeatData::eSubtype_mRNA);
00537     sah = scope.AddSeq_annot(*annot);
00538 
00539     CFeat_CI it3(sah);
00540     while (it3) {
00541         if (it3->GetFeatSubtype() == CSeqFeatData::eSubtype_cdregion) {
00542             const CSeq_feat& cds = it3->GetOriginalFeature();
00543             CRef<CSeq_feat> pRna = edit::MakemRNAforCDS(cds, scope);
00544             BOOST_REQUIRE(!pRna);
00545         }
00546         ++it3;
00547     }
00548 
00549 }
00550 
00551 
00552 BOOST_AUTO_TEST_CASE(Test_SimpleReplace)
00553 {
00554     CRef<CSimple_replace> repl(new CSimple_replace());
00555     repl->SetReplace("foo");
00556 
00557     string test = "abc";
00558 
00559     CRef<CString_constraint> constraint(NULL);
00560     BOOST_CHECK_EQUAL(repl->ApplyToString(test, constraint), true);
00561     BOOST_CHECK_EQUAL(test, "foo");
00562 
00563     test = "candidate abc";
00564     repl->SetWeasel_to_putative(true);
00565     BOOST_CHECK_EQUAL(repl->ApplyToString(test, constraint), true);
00566     BOOST_CHECK_EQUAL(test, "putative foo");
00567 }
00568 
00569 
00570 BOOST_AUTO_TEST_CASE(Test_ReplaceFunc)
00571 {
00572     CRef<CReplace_func> repl(new CReplace_func());
00573     repl->SetHaem_replace("haem");
00574 
00575     string test = "haemagglutination domain protein";
00576 
00577     CRef<CString_constraint> constraint(NULL);
00578     BOOST_CHECK_EQUAL(repl->ApplyToString(test, constraint), true);
00579     BOOST_CHECK_EQUAL(test, "hemagglutination domain protein");
00580 
00581     test = "land of the free, haem of the brave";
00582     BOOST_CHECK_EQUAL(repl->ApplyToString(test, constraint), true);
00583     BOOST_CHECK_EQUAL(test, "land of the free, heme of the brave");
00584 
00585     repl->SetSimple_replace().SetReplace("foo");
00586     test = "abc";
00587 
00588     BOOST_CHECK_EQUAL(repl->ApplyToString(test, constraint), true);
00589     BOOST_CHECK_EQUAL(test, "foo");
00590 
00591     test = "candidate abc";
00592     repl->SetSimple_replace().SetWeasel_to_putative(true);
00593     BOOST_CHECK_EQUAL(repl->ApplyToString(test, constraint), true);
00594     BOOST_CHECK_EQUAL(test, "putative foo");
00595 
00596 }
00597 
00598 
00599 BOOST_AUTO_TEST_CASE(Test_SuspectRule)
00600 {
00601     CRef<CSuspect_rule> rule(new CSuspect_rule());
00602     rule->SetFind().SetString_constraint().SetMatch_text("haem");
00603     rule->SetReplace().SetReplace_func().SetHaem_replace("haem");
00604 
00605     string test = "haemagglutination domain protein";
00606 
00607     BOOST_CHECK_EQUAL(rule->ApplyToString(test), true);
00608     BOOST_CHECK_EQUAL(test, "hemagglutination domain protein");
00609 
00610     test = "land of the free, haem of the brave";
00611     BOOST_CHECK_EQUAL(rule->ApplyToString(test), true);
00612     BOOST_CHECK_EQUAL(test, "land of the free, heme of the brave");
00613 
00614     rule->SetFind().SetString_constraint().SetMatch_text("abc");
00615     rule->SetReplace().SetReplace_func().SetSimple_replace().SetReplace("foo");
00616     rule->SetReplace().SetReplace_func().SetSimple_replace().SetWhole_string(true);
00617     test = "abc";
00618 
00619     BOOST_CHECK_EQUAL(rule->ApplyToString(test), true);
00620     BOOST_CHECK_EQUAL(test, "foo");
00621 
00622     test = "candidate abc";
00623     rule->SetReplace().SetReplace_func().SetSimple_replace().SetWeasel_to_putative(true);
00624     BOOST_CHECK_EQUAL(rule->ApplyToString(test), true);
00625     BOOST_CHECK_EQUAL(test, "putative foo");
00626 
00627     test = "do not match me";
00628     rule->SetReplace().SetReplace_func().SetSimple_replace().ResetWhole_string();
00629     rule->SetFind().SetString_constraint().SetMatch_text("me");
00630     rule->SetFind().SetString_constraint().SetMatch_location(eString_location_starts);
00631     BOOST_CHECK_EQUAL(rule->ApplyToString(test), false);
00632     BOOST_CHECK_EQUAL(test, "do not match me");
00633 
00634     rule->SetFind().SetString_constraint().SetMatch_location(eString_location_ends);
00635     BOOST_CHECK_EQUAL(rule->ApplyToString(test), true);
00636     BOOST_CHECK_EQUAL(test, "do not match foo");
00637 
00638     test = "me first";
00639     rule->SetFind().SetString_constraint().SetMatch_location(eString_location_starts);
00640     BOOST_CHECK_EQUAL(rule->ApplyToString(test), true);
00641     BOOST_CHECK_EQUAL(test, "foo first");
00642 
00643     test = "me me me me";
00644     rule->SetFind().SetString_constraint().ResetMatch_location();
00645 
00646     BOOST_CHECK_EQUAL(rule->ApplyToString(test), true);
00647     BOOST_CHECK_EQUAL(test, "foo foo foo foo");
00648     
00649     test = "30S ribosomal protein S12";
00650     rule->SetFind().Reset();
00651     rule->SetFind().SetString_constraint().SetMatch_location(eString_location_equals);
00652     rule->SetFind().SetString_constraint().SetMatch_text("CHC2 zinc finger");
00653     rule->SetFind().SetString_constraint().SetIgnore_weasel(true);
00654     rule->SetReplace().Reset();
00655     rule->SetReplace().SetReplace_func().SetSimple_replace().SetReplace("CHC2 zinc finger protein");
00656     rule->SetReplace().SetReplace_func().SetSimple_replace().SetWhole_string(false);
00657     rule->SetReplace().SetReplace_func().SetSimple_replace().SetWeasel_to_putative(true);
00658     BOOST_CHECK_EQUAL(rule->ApplyToString(test), false);
00659 
00660     test = "hypothetical protein";
00661     rule->SetFind().Reset();
00662     rule->SetFind().SetString_constraint().SetMatch_location(eString_location_equals);
00663     rule->SetFind().SetString_constraint().SetMatch_text("protein");
00664     rule->SetFind().SetString_constraint().SetIgnore_weasel(true);
00665     rule->SetReplace().Reset();
00666     rule->SetReplace().SetReplace_func().SetSimple_replace().SetReplace("hypothetical protein");
00667     BOOST_CHECK_EQUAL(rule->ApplyToString(test), true);
00668 
00669     // string_constraint with ignore-words
00670     test = "human";
00671     rule->SetFind().Reset();
00672     rule->SetFind().SetString_constraint().SetMatch_text("Homo sapiens");
00673     rule->SetFind().SetString_constraint().SetMatch_location(eString_location_equals);
00674     rule->SetFind().SetString_constraint().SetIgnore_space(true);
00675     rule->SetFind().SetString_constraint().SetIgnore_punct(true);
00676 
00677     CRef <CWord_substitution_set> word_subs(new CWord_substitution_set);
00678     rule->SetFind().SetString_constraint().SetIgnore_words(word_subs.GetObject());
00679 
00680     CRef <CWord_substitution> word_sub(new CWord_substitution);
00681     word_sub->SetWord("Homo sapiens");
00682     list <string> syns;
00683     syns.push_back("human");
00684     syns.push_back("Homo sapien");
00685     syns.push_back("Homosapiens");
00686     syns.push_back("Homo-sapiens");
00687     syns.push_back("Homo spiens");
00688     syns.push_back("Homo Sapience");
00689     syns.push_back("homosapein");
00690     syns.push_back("homosapiens");
00691     syns.push_back("homosapien");
00692     syns.push_back("homo_sapien");
00693     syns.push_back("homo_sapiens");
00694     syns.push_back("Homosipian");
00695     word_sub->SetSynonyms() = syns;
00696     rule->SetFind().SetString_constraint().SetIgnore_words().Set().push_back(word_sub);
00697 
00698     word_sub.Reset(new CWord_substitution);
00699     word_sub->SetWord("sapiens");
00700     syns.clear();
00701     syns.push_back("sapien");
00702     syns.push_back("sapeins");
00703     syns.push_back("sapein");
00704     syns.push_back("sapins");
00705     syns.push_back("sapens");
00706     syns.push_back("sapin");
00707     syns.push_back("sapen");
00708     syns.push_back("sapians");
00709     syns.push_back("sapian");
00710     syns.push_back("sapies");
00711     syns.push_back("sapie");
00712     word_sub->SetSynonyms() = syns;
00713     rule->SetFind().SetString_constraint().SetIgnore_words().Set().push_back(word_sub);
00714     BOOST_CHECK_EQUAL(rule->ApplyToString(test), true);
00715     test = "human";
00716     BOOST_CHECK_EQUAL(rule->ApplyToString(test), true);
00717     test = "human1";
00718     BOOST_CHECK_EQUAL(rule->ApplyToString(test), false);
00719     test = "Homo sapien";
00720     BOOST_CHECK_EQUAL(rule->ApplyToString(test), true);
00721     test = "Human sapien";
00722     BOOST_CHECK_EQUAL(rule->ApplyToString(test), false);
00723     test = "sapien";
00724     BOOST_CHECK_EQUAL(rule->ApplyToString(test), false);
00725 
00726     word_sub.Reset(new CWord_substitution);
00727     // all the syns won't match because of missing word_sub.Word;
00728     syns.clear();
00729     syns.push_back("fruit");     
00730     syns.push_back("apple");
00731     syns.push_back("apple, pear");
00732     syns.push_back("grape");
00733     syns.push_back("peaches");
00734     syns.push_back("peach");
00735     word_sub->SetSynonyms() = syns;
00736     rule->SetFind().SetString_constraint().SetIgnore_words().Set().push_back(word_sub);
00737     test = "fruit";
00738     BOOST_CHECK_EQUAL(rule->ApplyToString(test), false);
00739     test = "pear, apple";
00740     BOOST_CHECK_EQUAL(rule->ApplyToString(test), false);
00741 }
00742 
00743 
00744 END_SCOPE(objects)
00745 END_NCBI_SCOPE
00746 
Modified on Tue Aug 26 17:20:41 2014 by modify_doxy.py rev. 426318