NCBI C++ ToolKit
blast_test_util.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: blast_test_util.cpp 50231 2011-06-28 17:45:59Z fongah2 $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Christiam Camacho
27  *
28  */
29 
30 /** @file blast_test_util.cpp
31  * Utilities to develop and debug unit tests for BLAST
32  */
33 
34 #include <ncbi_pch.hpp>
35 #include "blast_test_util.hpp"
36 #include <corelib/ncbimisc.hpp>
37 #include <corelib/ncbitype.h>
38 #include <util/random_gen.hpp>
39 
40 // Serialization includes
41 #include <serial/serial.hpp>
42 #include <serial/objistr.hpp>
43 
44 // Object manager includes
45 #include <objmgr/bioseq_handle.hpp>
46 #include <objmgr/seq_vector.hpp>
50 
51 // Object includes
53 
54 // Formatter includes
56 
57 #include <sstream>
58 
59 #define NCBI_BOOST_NO_AUTO_TEST_MAIN
60 #include <corelib/test_boost.hpp>
61 
62 using namespace std;
63 using namespace ncbi;
64 using namespace ncbi::objects;
65 using namespace ncbi::align_format;
66 
67 namespace TestUtil {
68 
69 objects::CSeq_id* GenerateRandomSeqid_Gi()
70 {
71  static CRandom random_gen(static_cast<CRandom::TValue>(time(0)));
72  return new CSeq_id(CSeq_id::e_Gi, random_gen.GetRand(1, 20000000));
73 }
74 
77 {
79 
81  ASSERT((*i)->GetSegs().IsDisc());
82 
83  ITERATE(CSeq_align::C_Segs::TDisc::Tdata, hsp_itr,
84  (*i)->GetSegs().GetDisc().Get()) {
85  retval->Set().push_back((*hsp_itr));
86  }
87  }
88 
89  return retval;
90 }
91 
93  const CSeq_align_set* sas,
94  CScope& scope)
95 {
96  ASSERT(sas);
97 
98  int align_opt = CDisplaySeqalign::eShowMiddleLine |
102 
104 
105  CDisplaySeqalign formatter(*saset, scope);
106  formatter.SetAlignOption(align_opt);
107  formatter.DisplaySeqalign(out);
108 }
109 
110 namespace {
111  union SUnion14 {
112  char end_bytes[4];
114  };
115 };
116 
117 Uint4
118 EndianIndependentBufferHash(const char * buffer,
119  Uint4 byte_length,
120  Uint4 swap_size,
121  Uint4 hash_seed)
122 {
123  Uint4 hash = hash_seed;
124  Uint4 swap_mask = swap_size - 1;
125 
126  // Check that swapsize is a power of two.
127  _ASSERT((swap_size) && (0 == (swap_mask & swap_size)));
128 
129  // Insure that the byte_length is a multiple of swap_size
130  _ASSERT((byte_length & swap_mask) == 0);
131 
132  SUnion14 swap_test;
133  swap_test.end_bytes[0] = 0x44;
134  swap_test.end_bytes[1] = 0x33;
135  swap_test.end_bytes[2] = 0x22;
136  swap_test.end_bytes[3] = 0x11;
137  Uint4 end_value = swap_test.end_value;
138 
139  if (end_value == 0x11223344) {
140  // Prevent actual swapping on little endian machinery.
141  swap_size = 1;
142  swap_mask = 0;
143  }
144 
145  Uint4 keep_mask = ~ swap_mask;
146 
147  // Logical address is the address if the data was little endian.
148 
149  for(Uint4 logical = 0; logical < byte_length; logical++) {
150  Uint4 physical =
151  (logical & keep_mask) | (swap_mask - (logical & swap_mask));
152 
153  // Alternate addition and XOR. This technique destroys most
154  // of the possible mathematical relationships between similar
155  // input strings.
156 
157  if (logical & 1) {
158  hash += int(buffer[physical]) & 0xFF;
159  } else {
160  hash ^= int(buffer[physical]) & 0xFF;
161  }
162 
163  // 1. "Rotate" by a value relatively prime to 32 (any odd
164  // value), to insure that each input bit will eventually
165  // affect each position.
166  // 2. Add a per-iteration constant to detect changes in length.
167 
168  hash = ((hash << 13) | (hash >> 19)) + 1234;
169  }
170 
171  return hash;
172 }
173 
174 CBlastOM::CBlastOM(const string& dbname, EDbType dbtype, ELocation location)
175 : m_ObjMgr(CObjectManager::GetInstance())
176 {
177  x_InitBlastDatabaseDataLoader(dbname, dbtype, location);
179 }
180 
181 void
183 {
184  try {
185  CRef<CReader> reader(new CId2Reader);
186  reader->SetPreopenConnection(false);
189  .GetLoader()->GetName();
190  } catch (const CException& e) {
191  m_GbLoaderName.erase();
192  ERR_POST(Warning << e.GetMsg());
193  }
194 }
195 
196 void
198  EDbType dbtype,
200 {
201  try {
202  if (location == eLocal) {
204  (*m_ObjMgr, dbname, dbtype, true,
207  } else {
209  (*m_ObjMgr, dbname, dbtype, true,
212  }
213  } catch (const CSeqDBException& e) {
214 
215  // if the database isn't found, ignore the exception as the Genbank
216  // data loader will be the fallback (just issue a warning)
217 
218  if (e.GetMsg().find("No alias or index file found ") != NPOS) {
219  ERR_POST(Warning << e.GetMsg());
220  }
221 
222  }
223 }
224 
226 {
227  CRef<CScope> retval(new CScope(*m_ObjMgr));
228 
229  if (!m_BlastDbLoaderName.empty()) {
231  }
232  if (!m_GbLoaderName.empty()) {
233  retval->AddDataLoader(m_GbLoaderName, 2);
234  }
235  return retval;
236 }
237 
239 {
240  if (!m_BlastDbLoaderName.empty()) {
242  }
243 }
244 
245 }
246 
void AddDataLoader(const string &loader_name, TPriority pri=kPriority_Default)
Add data loader by name.
Definition: scope.cpp:467
ncbi::CBlastDbDataLoader::EDbType EDbType
Defines Limits for the types used in NCBI C/C++ toolkit.
std::string m_GbLoaderName
CRef< CSeq_align_set > FlattenSeqAlignSet(const CSeq_align_set &sset)
unsigned int Uint4
Alias for unsigned int.
Definition: ncbitype.h:121
std::ofstream out("events_result.xml")
main entry point for tests
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1008
void SetPreopenConnection(bool preopen=true)
Definition: reader.cpp:205
CObjectManager –.
CSeqDBException.
Definition: seqdbcommon.hpp:63
void RevokeBlastDbDataLoader()
Removes the BLAST database data loader from the object manager.
#define ASSERT
macro for assert.
Definition: ncbi_std.h:105
STL namespace.
Tdata & Set(void)
Assign a value to data member.
Uint4 EndianIndependentBufferHash(const char *buffer, Uint4 byte_length, Uint4 swap_size, Uint4 hash_seed)
Endianness independent hash function.
TValue GetRand(void)
Get the next random number in the interval [0..GetMax()] (inclusive)
Definition: random_gen.hpp:238
ncbi::CRef< ncbi::objects::CScope > NewScope()
Create a new scope with the default set to the BLAST database data loader for the BLAST database spec...
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:776
#define NPOS
Definition: ncbistr.hpp:130
int i
GenInfo Integrated Database.
Definition: Seq_id_.hpp:106
void x_InitBlastDatabaseDataLoader(const std::string &dbname, EDbType dbtype, ELocation location)
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: gbloader.cpp:243
objects::CSeq_id * GenerateRandomSeqid_Gi()
void SetAlignOption(int option)
Set functions.
Definition: showalign.hpp:283
std::string m_BlastDbLoaderName
Data loader implementation that uses the blast databases remotely.
list< CRef< CSeq_align > > Tdata
Uint4 end_value
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:185
Utility stuff for more convenient using of Boost.Test library.
void DisplaySeqalign(CNcbiOstream &out)
call this to display seqalign
Definition: showalign.cpp:1846
bool RevokeDataLoader(CDataLoader &loader)
Revoke previously registered data loader.
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, const string &dbname="nr", const EDbType dbtype=eUnknown, bool use_fixed_size_slices=true, CObjectManager::EIsDefault is_default=CObjectManager::eNonDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
CRandom::
Definition: random_gen.hpp:65
Magic spell ;-) needed for some weird compilers... very empiric.
Miscellaneous common-use basic types and functionality.
CException –.
Definition: ncbiexpt.hpp:709
const Tdata & Get(void) const
Get the member data.
char * dbname(DBPROCESS *dbproc)
Get name of current database.
Definition: dblib.c:6854
CScope –.
Definition: scope.hpp:90
Deprecated: use kPriority_Default instead.
char end_bytes[4]
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1153
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
#define _ASSERT
static const char location[]
Definition: config.c:97
Definition: _hash_fun.h:40
const string & GetMsg(void) const
Get message string.
Definition: ncbiexpt.cpp:443
ncbi::CRef< ncbi::objects::CObjectManager > m_ObjMgr
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, const string &dbname="nr", const EDbType dbtype=eUnknown, bool use_fixed_size_slices=true, CObjectManager::EIsDefault is_default=CObjectManager::eNonDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: bdbloader.cpp:52
TLoader * GetLoader(void) const
Get pointer to the loader.
void PrintFormattedSeqAlign(ostream &out, const CSeq_align_set *sas, CScope &scope)
Modified on Wed Aug 16 05:50:14 2017 by modify_doxy.py rev. 533848