NCBI C++ ToolKit
unit_test_id_mapper.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: unit_test_id_mapper.cpp 73453 2016-07-07 20:58:55Z vakatov $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Nathan Bouk
27  *
28  * File Description:
29  *
30  * ===========================================================================
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <corelib/test_boost.hpp>
40 
41 
42 #include <boost/test/output_test_stream.hpp>
43 using boost::test_tools::output_test_stream;
44 
47 
48 
49 BOOST_AUTO_TEST_SUITE(TestSuiteGencollIdMapper)
50 
51 
52 BOOST_AUTO_TEST_CASE(TestCaseUcscToRefSeqMapping)
53 {
54  // Fetch Gencoll
57  GCService.GetAssembly("GCF_000001405.13",
59  )
60  );
61 
62  // Make a Spec
64  MapSpec.TypedChoice = CGC_TypedSeqId::e_Refseq;
65  MapSpec.Alias = CGC_SeqIdAlias::e_Public;
66 
67  // Do a Map
68  CGencollIdMapper Mapper(GenColl);
69  CRef<CSeq_loc> OrigLoc(new CSeq_loc());
70  OrigLoc->SetWhole().SetLocal().SetStr("chr1");
71  CRef<CSeq_loc> Result = Mapper.Map(*OrigLoc, MapSpec);
72 
73  // Check that Map results meet expectations
74  BOOST_CHECK_EQUAL(Result->GetId()->GetSeqIdString(true), "NC_000001.10");
75 }
76 
77 
78 BOOST_AUTO_TEST_CASE(TestCaseUcscToRefSeqToUcscMapping)
79 {
80  // Fetch Gencoll
83  GCService.GetAssembly("GCF_000001405.13",
85  )
86  );
87 
88  // Make a Spec
90  MapSpec.TypedChoice = CGC_TypedSeqId::e_Refseq;
91  MapSpec.Alias = CGC_SeqIdAlias::e_Public;
92 
93  // Do a Map
94  CGencollIdMapper Mapper(GenColl);
95  CRef<CSeq_loc> OrigLoc(new CSeq_loc());
96  OrigLoc->SetWhole().SetLocal().SetStr("chr1");
97 
98  CRef<CSeq_loc> Mapped = Mapper.Map(*OrigLoc, MapSpec);
99 
100  // Check that Map results meet expectations
101  BOOST_CHECK_EQUAL(Mapped->GetId()->GetSeqIdString(true), "NC_000001.10");
102 
103  // Guess the original ID's spec
104  CGencollIdMapper::SIdSpec GuessSpec;
105  Mapper.Guess(*OrigLoc, GuessSpec);
106 
107  // Map back with the guessed spec
108  CRef<CSeq_loc> RoundTripped = Mapper.Map(*Mapped, GuessSpec);
109 
110  // Check that Round tripped is equal to original
111  BOOST_CHECK(RoundTripped->Equals(*OrigLoc));
112 }
113 
114 
115 BOOST_AUTO_TEST_CASE(TestCaseUcscUnTest_Scaffold)
116 {
117  // Fetch Gencoll
118  CGenomicCollectionsService GCService;
119  CConstRef<CGC_Assembly> GenColl(
120  GCService.GetAssembly("GCF_000003205.2",
122  )
123  );
124 
125  // Make a Spec
127  MapSpec.TypedChoice = CGC_TypedSeqId::e_Genbank;
128  MapSpec.Alias = CGC_SeqIdAlias::e_Gi;
129 
130  // Do a Map
131  CGencollIdMapper Mapper(GenColl);
132  CRef<CSeq_loc> OrigLoc(new CSeq_loc());
133  OrigLoc->SetWhole().SetLocal().SetStr("chrUn.004.10843");
134 
135  CRef<CSeq_loc> Result = Mapper.Map(*OrigLoc, MapSpec);
136 
137  // Check that Map results meet expectations
138  BOOST_CHECK_EQUAL(Result->GetId()->GetGi(), GI_CONST(112070986)); // AAFC03080232.1
139 }
140 
141 
142 BOOST_AUTO_TEST_CASE(TestCaseUcscUnTest_Comp)
143 {
144  // Fetch Gencoll
145  CGenomicCollectionsService GCService;
146  CConstRef<CGC_Assembly> GenColl(
147  GCService.GetAssembly("GCF_000003205.2",
149  )
150  );
151 
152  // Make a Spec
154  MapSpec.TypedChoice = CGC_TypedSeqId::e_Genbank;
155  MapSpec.Alias = CGC_SeqIdAlias::e_Gi;
156 
157  // Do a Map
158  CGencollIdMapper Mapper(GenColl);
159  CRef<CSeq_loc> OrigLoc(new CSeq_loc());
160  OrigLoc->SetWhole().SetLocal().SetStr("chrUn.004.10843");
161 
162  CRef<CSeq_loc> Result = Mapper.Map(*OrigLoc, MapSpec);
163 
164  // Check that Map results meet expectations
165  BOOST_CHECK_EQUAL(Result->GetId()->GetGi(), GI_CONST(112070986)); // AAFC03080232.1
166 }
167 
168 
169 BOOST_AUTO_TEST_CASE(TestCaseUcscPseudoTest_Scaffold)
170 {
171  // Fetch Gencoll
172  CGenomicCollectionsService GCService;
173  CConstRef<CGC_Assembly> GenColl(
174  GCService.GetAssembly("GCF_000001405.12",
176  0,
177  0,
178  2048, // CGencollAccess::fAttribute_include_UCSC_pseudo_scaffolds
179  0
180  )
181  );
182 
183  // Make a Spec
185  MapSpec.TypedChoice = CGC_TypedSeqId::e_Refseq;
186  MapSpec.Alias = CGC_SeqIdAlias::e_Public;
187  MapSpec.Role = eGC_SequenceRole_top_level;
188 
189  // Do a Map
190  CGencollIdMapper Mapper(GenColl);
191  CRef<CSeq_loc> OrigLoc(new CSeq_loc());
192  OrigLoc->SetInt().SetId().SetLocal().SetStr("chr1_random");
193  OrigLoc->SetInt().SetFrom(500000);
194  OrigLoc->SetInt().SetTo(510000);
195 
196  CRef<CSeq_loc> Result = Mapper.Map(*OrigLoc, MapSpec);
197  BOOST_CHECK(Result.NotNull());
198 
199  CRef<CSeq_loc> Expected(new CSeq_loc());
200  Expected->SetInt().SetId().Set("NT_113872.1");
201  Expected->SetInt().SetFrom(57066);
202  Expected->SetInt().SetTo(67066);
203  Expected->SetInt().SetStrand(eNa_strand_plus);
204 
205  // Check that Map results meet expectations
206  BOOST_CHECK(Result->Equals(*Expected));
207 
208  // chr6_random 65878 67001 chr6_random:67103 12330 +
209  OrigLoc.Reset(new CSeq_loc());
210  OrigLoc->SetInt().SetId().SetLocal().SetStr("chr6_random");
211  OrigLoc->SetInt().SetFrom(65878);
212  OrigLoc->SetInt().SetTo(67000);
213  OrigLoc->SetInt().SetStrand(eNa_strand_plus);
214 
215  CGencollIdMapper::SIdSpec GuessSpec;
216  Mapper.Guess(*OrigLoc, GuessSpec);
217 
218  Result = Mapper.Map(*OrigLoc, MapSpec);
219  BOOST_CHECK(Result.NotNull());
220 
221  Expected.Reset(new CSeq_loc());
222  Expected->SetInt().SetId().Set("NT_113898.1");
223  Expected->SetInt().SetFrom(65878);
224  Expected->SetInt().SetTo(67000);
225  Expected->SetInt().SetStrand(eNa_strand_plus);
226 
227  BOOST_CHECK(Result->Equals(*Expected));
228  Result = Mapper.Map(*Result, GuessSpec);
229  BOOST_CHECK(Result.NotNull());
230  BOOST_CHECK(OrigLoc->Equals(*Result));
231 
232  // chr5_random 113060 114326 chr5:180363135 34903 +
233  OrigLoc.Reset(new CSeq_loc());
234  OrigLoc->SetInt().SetId().SetLocal().SetStr("chr5_random");
235  OrigLoc->SetInt().SetFrom(113060);
236  OrigLoc->SetInt().SetTo(114325);
237  OrigLoc->SetInt().SetStrand(eNa_strand_plus);
238 
239  Mapper.Guess(*OrigLoc, GuessSpec);
240 
241  Result = Mapper.Map(*OrigLoc, MapSpec);
242  BOOST_CHECK(Result.NotNull());
243 
244  Expected.Reset(new CSeq_loc());
245  Expected->SetInt().SetId().Set("NT_113890.1");
246  Expected->SetInt().SetFrom(113060);
247  Expected->SetInt().SetTo(114325);
248  Expected->SetInt().SetStrand(eNa_strand_plus);
249 
250  BOOST_CHECK(Result->Equals(*Expected));
251  Result = Mapper.Map(*Result, GuessSpec);
252  BOOST_CHECK(Result.NotNull());
253  BOOST_CHECK(OrigLoc->Equals(*Result));
254 
255 }
256 
257 
258 BOOST_AUTO_TEST_CASE(TestCaseUcscPseudoTest_Comp)
259 {
260  // Fetch Gencoll
261  CGenomicCollectionsService GCService;
262  CConstRef<CGC_Assembly> GenColl(
263  GCService.GetAssembly("GCF_000001405.12",
265  0,
266  0,
267  2048, // pseudo
268  0
269  )
270  );
271 
272  // Make a Spec
274  MapSpec.TypedChoice = CGC_TypedSeqId::e_Refseq;
275  MapSpec.Alias = CGC_SeqIdAlias::e_Public;
276  MapSpec.Role = eGC_SequenceRole_top_level;
277 
278  // Do a Map
279  CGencollIdMapper Mapper(GenColl);
280  CRef<CSeq_loc> OrigLoc(new CSeq_loc());
281  OrigLoc->SetInt().SetId().SetLocal().SetStr("chr1_random");
282  OrigLoc->SetInt().SetFrom(500000);
283  OrigLoc->SetInt().SetTo(510000);
284 
285  CRef<CSeq_loc> Result = Mapper.Map(*OrigLoc, MapSpec);
286 
287  CRef<CSeq_loc> Expected(new CSeq_loc());
288  Expected->SetInt().SetId().Set("NT_113872.1");
289  Expected->SetInt().SetFrom(57066);
290  Expected->SetInt().SetTo(67066);
291  Expected->SetInt().SetStrand(eNa_strand_plus);
292 
293  // Check that Map results meet expectations
294  BOOST_CHECK(Result->Equals(*Expected));
295 }
296 
297 
298 // map down test
299 BOOST_AUTO_TEST_CASE(TestCaseDownMapTest)
300 {
301  // Fetch Gencoll
302  CGenomicCollectionsService GCService;
303  CConstRef<CGC_Assembly> GenColl(
304  GCService.GetAssembly("GCF_000001405.13",
306  0,
307  0,
308  2048, // pseudo
309  0
310  )
311  );
312 
313  // Make a Spec
315  MapSpec.TypedChoice = CGC_TypedSeqId::e_Genbank;
316  MapSpec.Alias = CGC_SeqIdAlias::e_Public;
317  MapSpec.Role = eGC_SequenceRole_component;
318 
319  // Do a Map
320  CGencollIdMapper Mapper(GenColl);
321  CSeq_loc OrigLoc;
322  OrigLoc.SetInt().SetId().Set("NC_000001.10");
323  OrigLoc.SetInt().SetFrom(50000000);
324  OrigLoc.SetInt().SetTo(50000001);
325 
326  CRef<CSeq_loc> Result = Mapper.Map(OrigLoc, MapSpec);
327 
328  // Expected component level result
329  CSeq_loc Expected;
330  Expected.SetInt().SetId().Set("AL356789.16");
331  Expected.SetInt().SetFrom(56981);
332  Expected.SetInt().SetTo(56982);
333  Expected.SetInt().SetStrand(eNa_strand_plus);
334 
335  // Check that Map results meet expectations
336  BOOST_CHECK(Result->Equals(Expected));
337 }
338 
339 
340 // map down scaf test
341 BOOST_AUTO_TEST_CASE(TestCaseDownScafMapTest)
342 {
343  // Fetch Gencoll
344  CGenomicCollectionsService GCService;
345  CConstRef<CGC_Assembly> GenColl(
346  GCService.GetAssembly("GCF_000001405.13",
348  0,
349  0,
350  2048, // pseudo
351  0
352  )
353  );
354 
355  // Make a Spec
357  MapSpec.TypedChoice = CGC_TypedSeqId::e_Genbank;
358  MapSpec.Alias = CGC_SeqIdAlias::e_Public;
359  MapSpec.Role = eGC_SequenceRole_scaffold;
360 
361  // Do a Map
362  CGencollIdMapper Mapper(GenColl);
363  CSeq_loc OrigLoc;
364  OrigLoc.SetInt().SetId().Set("NC_000001.10");
365  OrigLoc.SetInt().SetFrom(50000000);
366  OrigLoc.SetInt().SetTo(50000001);
367 
368  CRef<CSeq_loc> Result = Mapper.Map(OrigLoc, MapSpec);
369 
370  // Expected component level result
371  CSeq_loc Expected;
372  Expected.SetInt().SetId().Set("GL000006.1");
373  Expected.SetInt().SetFrom(19971918);
374  Expected.SetInt().SetTo(19971919);
375  Expected.SetInt().SetStrand(eNa_strand_plus);
376 
377  // Check that Map results meet expectations
378  BOOST_CHECK(Result->Equals(Expected));
379 }
380 
381 // upmap test
382 BOOST_AUTO_TEST_CASE(TestCaseUpMapTest_RefSeqAssm)
383 {
384  // Fetch Gencoll
385  CGenomicCollectionsService GCService;
386  CConstRef<CGC_Assembly> GenColl(
387  GCService.GetAssembly("GCF_000001405.13",
389  0,
390  0,
391  2048, // pseudo
392  0
393  )
394  );
395 
396  // Make a Spec
398  MapSpec.TypedChoice = CGC_TypedSeqId::e_Genbank;
399  MapSpec.Alias = CGC_SeqIdAlias::e_Public;
400  MapSpec.Role = eGC_SequenceRole_top_level;
401 
402  // Do a Map
403  CGencollIdMapper Mapper(GenColl);
404  CSeq_loc OrigLoc;
405  CSeq_interval& orig_ival = OrigLoc.SetInt();
406  orig_ival.SetId().Set(CSeq_id::e_Local, "AL451051.6");
407  orig_ival.SetFrom(5000);
408  orig_ival.SetTo(5001);
409 
410  CConstRef<CSeq_loc> Result = Mapper.Map(OrigLoc, MapSpec);
411 
412  // Expected component level result
413  CSeq_loc Expected;
414  CSeq_interval& exp_ival = Expected.SetInt();
415  exp_ival.SetId().Set("CM000663.1");
416  exp_ival.SetFrom(100236283);
417  exp_ival.SetTo(100236284);
418 
419  // Check that Map results meet expectations
420  BOOST_CHECK(Result->Equals(Expected));
421 }
422 
423 
424 // upmap test
425 BOOST_AUTO_TEST_CASE(TestCaseUpMapTest_GenBankAssm)
426 {
427  // Fetch Gencoll
428  CGenomicCollectionsService GCService;
429  CConstRef<CGC_Assembly> GenColl(
430  GCService.GetAssembly("GCA_000001405.1",
432  0,
433  0,
434  2048, // pseudo
435  0
436  )
437  );
438 
439  // Make a Spec
441  MapSpec.TypedChoice = CGC_TypedSeqId::e_Genbank;
442  MapSpec.Alias = CGC_SeqIdAlias::e_Public;
443  MapSpec.Role = eGC_SequenceRole_top_level;
444 
445  // Do a Map
446  CGencollIdMapper Mapper(GenColl);
447  CSeq_loc OrigLoc;
448  CSeq_interval& orig_ival = OrigLoc.SetInt();
449  orig_ival.SetId().Set(CSeq_id::e_Local, "AL451051.6");
450  orig_ival.SetFrom(5000);
451  orig_ival.SetTo(5001);
452 
453  CConstRef<CSeq_loc> Result = Mapper.Map(OrigLoc, MapSpec);
454 
455  // Expected component level result
456  CSeq_loc Expected;
457  CSeq_interval& exp_ival = Expected.SetInt();
458  exp_ival.SetId().Set("CM000663.1");
459  exp_ival.SetFrom(100236283);
460  exp_ival.SetTo(100236284);
461 
462  // Check that Map results meet expectations
463  BOOST_CHECK(Result->Equals(Expected));
464 }
465 
466 
467 // upmap scaffold test
468 BOOST_AUTO_TEST_CASE(TestCaseUpMapScaffoldTest)
469 {
470  // Fetch Gencoll
471  CGenomicCollectionsService GCService;
472  CConstRef<CGC_Assembly> GenColl(
473  GCService.GetAssembly("GCF_000001405.13",
475  0,
476  0,
477  2048, // pseudo
478  0
479  )
480  );
481 
482  // Make a Spec
484  MapSpec.TypedChoice = CGC_TypedSeqId::e_Genbank;
485  MapSpec.Alias = CGC_SeqIdAlias::e_Public;
486  MapSpec.Role = eGC_SequenceRole_scaffold;
487 
488  // Do a Map
489  CGencollIdMapper Mapper(GenColl);
490  CSeq_loc OrigLoc;
491  OrigLoc.SetInt().SetId().Set("AL451051.6");
492  OrigLoc.SetInt().SetFrom(5000);
493  OrigLoc.SetInt().SetTo(5001);
494 
495  CConstRef<CSeq_loc> Result = Mapper.Map(OrigLoc, MapSpec);
496 
497  // Expected component level result
498  CSeq_loc Expected;
499  Expected.SetInt().SetId().Set("GL000006.1");
500  Expected.SetInt().SetFrom(70208201);
501  Expected.SetInt().SetTo(70208202);
502  Expected.SetInt().SetStrand(eNa_strand_plus);
503 
504  // Check that Map results meet expectations
505  BOOST_CHECK(Result->Equals(Expected));
506 }
507 
508 
509 // pattern text
510 BOOST_AUTO_TEST_CASE(TestCasePatternMapping)
511 {
512  // Fetch Gencoll
513  CGenomicCollectionsService GCService;
514  CConstRef<CGC_Assembly> GenColl(
515  GCService.GetAssembly("GCF_000001405.13",
517  )
518  );
519 
520  // Make a Spec
522  MapSpec.TypedChoice = CGC_TypedSeqId::e_Refseq;
523  MapSpec.Alias = CGC_SeqIdAlias::e_Public;
524 
525  // Do a Map
526  CGencollIdMapper Mapper(GenColl);
527  CRef<CSeq_loc> OrigLoc(new CSeq_loc());
528  OrigLoc->SetWhole().SetLocal().SetStr("1");
529  CConstRef<CSeq_loc> Result = Mapper.Map(*OrigLoc, MapSpec);
530  // Check that Map results meet expectations
531  BOOST_CHECK_EQUAL(Result->GetId()->GetSeqIdString(true), "NC_000001.10");
532 
533 
534  OrigLoc->SetWhole().SetLocal().SetStr("C1");
535  Result = Mapper.Map(*OrigLoc, MapSpec);
536  BOOST_CHECK_EQUAL(Result->GetId()->GetSeqIdString(true), "NC_000001.10");
537 
538 
539  OrigLoc->SetWhole().SetLocal().SetStr("LG1");
540  Result = Mapper.Map(*OrigLoc, MapSpec);
541  BOOST_CHECK_EQUAL(Result->GetId()->GetSeqIdString(true), "NC_000001.10");
542 
543 
544  OrigLoc->SetWhole().SetLocal().SetStr("WAKKAWAKKA1");
545  Result = Mapper.Map(*OrigLoc, MapSpec);
546  BOOST_CHECK_EQUAL(Result->GetId()->GetSeqIdString(true), "NC_000001.10");
547 }
548 
549 
550 // Up/Down, Round Trip, Pattern test
551 BOOST_AUTO_TEST_CASE(TestCaseEverythingTest)
552 {
553  // Fetch Gencoll
554  CGenomicCollectionsService GCService;
555  CConstRef<CGC_Assembly> GenColl(
556  GCService.GetAssembly("GCF_000001405.13",
558  )
559  );
560 
561  // Make a Spec
563  MapSpec.TypedChoice = CGC_TypedSeqId::e_Refseq;
564  MapSpec.Alias = CGC_SeqIdAlias::e_Public;
565  MapSpec.Role = eGC_SequenceRole_scaffold;
566 
567  // Do a Map
568  CGencollIdMapper Mapper(GenColl);
569  CRef<CSeq_loc> OrigLoc(new CSeq_loc());
570  OrigLoc->SetInt().SetId().SetLocal().SetStr("LG2");
571  OrigLoc->SetInt().SetFrom(123456789);
572  OrigLoc->SetInt().SetTo(123456798);
573  OrigLoc->SetInt().SetStrand(eNa_strand_plus);
574 
575  CRef<CSeq_loc> Result = Mapper.Map(*OrigLoc, MapSpec);
576 
577  CRef<CSeq_loc> Expected(new CSeq_loc());
578  Expected->SetInt().SetId().Set("NT_022135.16");
579  Expected->SetInt().SetFrom(13205452);
580  Expected->SetInt().SetTo(13205461);
581  Expected->SetInt().SetStrand(eNa_strand_plus);
582 
583  // Check that Map results meet expectations
584  BOOST_CHECK(Result->Equals(*Expected));
585 
586  CGencollIdMapper::SIdSpec GuessSpec;
587  Mapper.Guess(*OrigLoc, GuessSpec);
588  BOOST_CHECK_EQUAL(GuessSpec.ToString(), "NotPrim:Private:NotSet::LG%s:CHRO:TOP");
589 
590  CRef<CSeq_loc> RoundTrip = Mapper.Map(*Result, GuessSpec);
591 
592  // Check that Map results meet expectations
593  BOOST_CHECK(RoundTrip->Equals(*OrigLoc));
594 }
595 
596 
597 // primary map test
598 BOOST_AUTO_TEST_CASE(TestCaseSpecPrimaryMap)
599 {
600  // Make a Spec
602  MapSpec.Primary = true;
603 
604  // Simple ID
605  CRef<CSeq_loc> OrigLoc(new CSeq_loc());
606  OrigLoc->SetWhole().SetLocal().SetStr("1");
607 
608 
609  // Map both RefSeq and Genbank assemblies with the same spec and id
610  CGenomicCollectionsService GCService;
611 
612  {{
613  CConstRef<CGC_Assembly> GenColl(
614  GCService.GetAssembly("GCF_000001405.13",
616  )
617  );
618 
619  // Do a Map
620  CGencollIdMapper Mapper(GenColl);
621  CConstRef<CSeq_loc> Result = Mapper.Map(*OrigLoc, MapSpec);
622  // Check that Map results meet expectations
623  BOOST_CHECK_EQUAL(Result->GetId()->GetGi(), GI_CONST(224589800));
624  }}
625 
626 
627  {{
628  CConstRef<CGC_Assembly> GenColl(
629  GCService.GetAssembly("GCA_000001405.1",
631  )
632  );
633 
634  // Do a Map
635  CGencollIdMapper Mapper(GenColl);
636  CConstRef<CSeq_loc> Result = Mapper.Map(*OrigLoc, MapSpec);
637  // Check that Map results meet expectations
638  BOOST_CHECK_EQUAL(Result->GetId()->GetGi(), GI_CONST(224384768));
639  }}
640 
641 }
642 
643 
644 // primary guess test
645 BOOST_AUTO_TEST_CASE(TestCaseSpecPrimaryGuess)
646 {
647  // Simple ID
648  CRef<CSeq_loc> OrigLoc(new CSeq_loc());
649  OrigLoc->SetWhole().SetGi(GI_CONST(224589800));
650 
651 
652  // Guess both RefSeq and Genbank assemblies with the same spec and id
653  CGenomicCollectionsService GCService;
654 
655  {{
656  CConstRef<CGC_Assembly> GenColl(
657  GCService.GetAssembly("GCF_000001405.13",
659  )
660  );
661 
662  // Do a Map
663  CGencollIdMapper Mapper(GenColl);
664  CGencollIdMapper::SIdSpec GuessSpec;
665  bool Result = Mapper.Guess(*OrigLoc, GuessSpec);
666  // Check that Guess results meet expectations
667  BOOST_CHECK_EQUAL(Result, true);
668  BOOST_CHECK_EQUAL(GuessSpec.ToString(), "Prim:RefSeq:Gi:::CHRO:TOP");
669  }}
670 
671 
672  {{
673  CConstRef<CGC_Assembly> GenColl(
674  GCService.GetAssembly("GCA_000001405.1",
676  )
677  );
678 
679  // Do a Map
680  CGencollIdMapper Mapper(GenColl);
681  CGencollIdMapper::SIdSpec GuessSpec;
682  bool Result = Mapper.Guess(*OrigLoc, GuessSpec);
683  // Check that Map results meet expectations
684  BOOST_CHECK_EQUAL(Result, true);
685  BOOST_CHECK_EQUAL(GuessSpec.ToString(), "NotPrim:RefSeq:Gi:::CHRO:TOP");
686  }}
687 
688 }
689 
690 // GetSynonyms test
691 BOOST_AUTO_TEST_CASE(TestCaseGetSynonyms)
692 {
693  // Fetch Gencoll
694  CGenomicCollectionsService GCService;
695  CConstRef<CGC_Assembly> GenColl(
696  GCService.GetAssembly("GCF_000001405.13",
698  )
699  );
700 
701 
702  // Do a GetSynonyms
703  CGencollIdMapper Mapper(GenColl);
704  CRef<CSeq_id> OrigId(new CSeq_id());
705  OrigId->SetLocal().SetStr("chr1");
706 
707  list<CConstRef<CSeq_id> > Synonyms;
708 
709 
710  // Check default NCBI-Only synonyms
711  Mapper.GetSynonyms(*OrigId, Synonyms);
712 
713  BOOST_CHECK_EQUAL(Synonyms.size(), (size_t)5);
714 
715 
716  // Check all synonyms
717  Synonyms.clear();
718  Mapper.GetSynonyms(*OrigId, Synonyms, false);
719 
720  BOOST_CHECK_EQUAL(Synonyms.size(), (size_t)7);
721 }
722 
723 
724 // dont actually upmap test
725 BOOST_AUTO_TEST_CASE(TestCaseUpMapTest_AltsDontUpmap)
726 {
727  // Fetch Gencoll
728  CGenomicCollectionsService GCService;
729  CConstRef<CGC_Assembly> GenColl(
730  GCService.GetAssembly("GCF_000001405.25",
732  0,
733  0,
734  2048, // pseudo
735  0
736  )
737  );
738 
739  // Make a Spec
741  MapSpec.TypedChoice = CGC_TypedSeqId::e_Refseq;
742  MapSpec.Alias = CGC_SeqIdAlias::e_Public;
743  MapSpec.Role = eGC_SequenceRole_top_level;
744 
745  // Do a Map
746  CGencollIdMapper Mapper(GenColl);
747  CSeq_loc OrigLoc;
748  CSeq_interval& orig_ival = OrigLoc.SetInt();
749  orig_ival.SetId().Set("gi|224515577");
750  orig_ival.SetFrom(3478538);
751  orig_ival.SetTo(3478538);
752  orig_ival.SetStrand(eNa_strand_plus);
753 
754  CConstRef<CSeq_loc> Result = Mapper.Map(OrigLoc, MapSpec);
755 
756  CSeq_loc ExpectLoc;
757  CSeq_interval& expect_ival = ExpectLoc.SetInt();
758  expect_ival.SetId().Set("NT_113891.2");
759  expect_ival.SetFrom(3478538);
760  expect_ival.SetTo(3478538);
761  expect_ival.SetStrand(eNa_strand_plus);
762 
763  // Check that Map results meet expectations
764  BOOST_CHECK(Result->Equals(ExpectLoc));
765 }
766 
767 // guess top over scaffold
768 BOOST_AUTO_TEST_CASE(TestCaseUpMapTest_GuessTopOverScaffold)
769 {
770  // Fetch Gencoll
771  CGenomicCollectionsService GCService;
772  CConstRef<CGC_Assembly> OldGenColl(
773  GCService.GetAssembly("GCF_000001405.11",
775  0,
776  0,
777  2048, // pseudo
778  0
779  )
780  );
781 
782  CConstRef<CGC_Assembly> NewGenColl(
783  GCService.GetAssembly("GCF_000001405.25",
785  0,
786  0,
787  2048, // pseudo
788  0
789  )
790  );
791 
792 
793  CGencollIdMapper OldMapper(OldGenColl);
794  CGencollIdMapper NewMapper(NewGenColl);
795 
796 
797  CSeq_loc OrigLoc;
798  CSeq_interval& orig_ival = OrigLoc.SetInt();
799  orig_ival.SetId().Set("NT_078074.1");
800  orig_ival.SetFrom(95619);
801  orig_ival.SetTo(146371);
802  orig_ival.SetStrand(eNa_strand_plus);
803 
804  CGencollIdMapper::SIdSpec GuessSpec;
805  OldMapper.Guess(OrigLoc, GuessSpec);
806 
807  CSeq_loc RemapLoc;
808  CSeq_interval& remap_ival = RemapLoc.SetInt();
809  remap_ival.SetId().Set("gi|224589821");
810  remap_ival.SetFrom(40301888);
811  remap_ival.SetTo(40507252);
812  remap_ival.SetStrand(eNa_strand_plus);
813 
814  // Do a Map
815 
816  CConstRef<CSeq_loc> Result = NewMapper.Map(RemapLoc, GuessSpec);
817 
818 
819  CSeq_loc ExpectLoc;
820  CSeq_interval& expect_ival = ExpectLoc.SetInt();
821  expect_ival.SetId().Set("NC_000009.11");
822  expect_ival.SetFrom(40301888);
823  expect_ival.SetTo(40507252);
824  expect_ival.SetStrand(eNa_strand_plus);
825 
826  // Check that Map results meet expectations
827  BOOST_CHECK(Result->Equals(ExpectLoc));
828 }
829 
830 
831 
832 // Don't match partial chromosome names
833 BOOST_AUTO_TEST_CASE(TestCasePartialChromosomeTest)
834 {
835  // Fetch Gencoll
836  CGenomicCollectionsService GCService;
837  CConstRef<CGC_Assembly> GenColl(
838  GCService.GetAssembly("GCF_000001405.13",
840  )
841  );
842 
843  // Make a Spec
845  MapSpec.TypedChoice = CGC_TypedSeqId::e_Genbank;
846  MapSpec.Alias = CGC_SeqIdAlias::e_Public;
847  MapSpec.Role = eGC_SequenceRole_component;
848 
849  // Do a Map
850  CGencollIdMapper Mapper(GenColl);
851 
852  CSeq_loc OrigLoc;
853  OrigLoc.SetInt().SetId().SetLocal().SetStr("23-499");
854  OrigLoc.SetInt().SetFrom(50000000);
855  OrigLoc.SetInt().SetTo(50000001);
856 
857  CRef<CSeq_loc> Result = Mapper.Map(OrigLoc, MapSpec);
858  BOOST_CHECK(Result.IsNull());
859 
860  OrigLoc.SetInt().SetId().SetLocal().SetStr("333");
861  Result = Mapper.Map(OrigLoc, MapSpec);
862  BOOST_CHECK(Result.IsNull());
863 
864  OrigLoc.SetInt().SetId().SetLocal().SetStr("425");
865  Result = Mapper.Map(OrigLoc, MapSpec);
866  BOOST_CHECK(Result.IsNull());
867 
868 }
869 
870 
871 // Fix-up PDB Seq-ids
872 BOOST_AUTO_TEST_CASE(TestCase_PDBSeqIdFix)
873 {
874  // Fetch Gencoll
875  CGenomicCollectionsService GCService;
876  CConstRef<CGC_Assembly> GenColl(
877  GCService.GetAssembly("GCF_000001215.2",
879 
880  // Make a Spec
882  MapSpec.TypedChoice = CGC_TypedSeqId::e_Refseq;
883  MapSpec.Alias = CGC_SeqIdAlias::e_Public;
884  MapSpec.Role = eGC_SequenceRole_top_level;
885 
886  // Do a Map
887  CGencollIdMapper Mapper(GenColl);
888 
889  // This PDB Seq-id is a mis-read of "local.str = 2LHet"
890  CSeq_loc OrigLoc;
891  OrigLoc.SetInt().SetId().SetPdb().SetMol().Set("2LHe");
892  OrigLoc.SetInt().SetId().SetPdb().SetChain(116);
893  OrigLoc.SetInt().SetFrom(1322833);
894  OrigLoc.SetInt().SetTo(1322945);
895 
896  CRef<CSeq_loc> Result = Mapper.Map(OrigLoc, MapSpec);
897  BOOST_CHECK(Result.NotNull());
898  BOOST_CHECK_EQUAL(Result->GetInt().GetId().GetSeqIdString(true), "NW_001848855.1");
899 }
900 
901 
902 
903 // Fix-up GI Seq-ids that should be strings
904 BOOST_AUTO_TEST_CASE(TestCase_GINumberString)
905 {
906  // Fetch Gencoll
907  CGenomicCollectionsService GCService;
908  CConstRef<CGC_Assembly> GenColl(
909  GCService.GetAssembly("GCF_000307585.1",
911 
912  // Make a Spec
914  MapSpec.TypedChoice = CGC_TypedSeqId::e_Refseq;
915  MapSpec.Alias = CGC_SeqIdAlias::e_Public;
916  MapSpec.Role = eGC_SequenceRole_top_level;
917 
918  // Do a Map
919  CGencollIdMapper Mapper(GenColl);
920 
921  // This PDB Seq-id is a mis-read of "local.str = 2LHet"
922  CSeq_loc OrigLoc;
923  OrigLoc.SetInt().SetId().SetGi(GI_CONST(397912605));
924  OrigLoc.SetInt().SetFrom(1);
925  OrigLoc.SetInt().SetTo(41937);
926 
927  CRef<CSeq_loc> Result = Mapper.Map(OrigLoc, MapSpec);
928  BOOST_CHECK(Result.NotNull());
929  BOOST_CHECK_EQUAL(Result->GetInt().GetId().GetSeqIdString(true), "NC_018264.1");
930 }
BOOST_AUTO_TEST_CASE(TestCaseUcscToRefSeqMapping)
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:718
void SetTo(TTo value)
Assign a value to To data member.
void SetFrom(TFrom value)
Assign a value to From data member.
USING_SCOPE(objects)
void SetStrand(TStrand value)
Assign a value to Strand data member.
void SetWhole(TWhole &v)
Definition: Seq_loc.hpp:964
BOOST_AUTO_TEST_SUITE_END()
Auxiliary class to convert a string into an argument count and vector.
#define GI_CONST(gi)
Definition: ncbimisc.hpp:1118
USING_NCBI_SCOPE
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
Definition: Seq_id.cpp:1465
bool NotNull(void) const THROWS_NONE
Check if pointer is not null – same effect as NotEmpty().
Definition: ncbiobj.hpp:727
Utility stuff for more convenient using of Boost.Test library.
void SetId(TId &value)
Assign a value to Id data member.
bool Guess(const objects::CSeq_loc &Loc, SIdSpec &Spec) const
Definition: id_mapper.cpp:97
CRef< CGC_Assembly > GetAssembly(const string &acc, int level=CGCClient_GetAssemblyRequest::eLevel_scaffold, int asmAttrFlags=eGCClient_AttributeFlags_none, int chrAttrFlags=eGCClient_AttributeFlags_biosource, int scafAttrFlags=eGCClient_AttributeFlags_none, int compAttrFlags=eGCClient_AttributeFlags_none)
string ToString(void) const
Definition: id_mapper.cpp:1718
virtual bool Equals(const CSerialObject &object, ESerialRecursionMode how=eRecursive) const
Check if both objects contain the same values.
BOOST_AUTO_TEST_SUITE(psiblast_iteration)
TStr & SetStr(void)
Select the variant.
Definition: Object_id_.hpp:304
void SetInt(TInt &v)
Definition: Seq_loc.hpp:965
TGi GetGi(void) const
Get the variant data.
Definition: Seq_id_.hpp:889
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:923
User-defined methods of the data storage class.
const TInt & GetInt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:194
TLocal & SetLocal(void)
Select the variant.
Definition: Seq_id_.cpp:199
const TId & GetId(void) const
Get the Id member data.
CRef< objects::CSeq_loc > Map(const objects::CSeq_loc &Loc, const SIdSpec &Spec) const
Definition: id_mapper.cpp:136
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:756
Modified on Thu Jul 28 20:11:02 2016 by modify_doxy.py rev. 506947