NCBI C++ ToolKit
ntscan_unit_test.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: ntscan_unit_test.cpp 73100 2016-06-20 15:45:40Z boratyng $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Jason Papadopoulos
27 *
28 * File Description:
29 * Nucleotide subject scan unit tests
30 *
31 * ===========================================================================
32 */
33 #include <ncbi_pch.hpp>
34 #include <corelib/test_boost.hpp>
35 
36 #include <corelib/ncbitime.hpp>
38 #include <objmgr/scope.hpp>
40 #include <objmgr/util/sequence.hpp>
41 
46 #include <serial/serial.hpp>
47 #include <serial/iterator.hpp>
48 #include <serial/objostr.hpp>
49 
51 #include "blast_objmgr_priv.hpp"
52 
60 
61 #include "test_objmgr.hpp"
62 
63 using namespace std;
64 using namespace ncbi;
65 using namespace ncbi::objects;
66 using namespace ncbi::blast;
67 
68 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
69  (!defined(NCBI_COMPILER_MIPSPRO)) )
70 static Uint1 template_11_16[] = {1,1,0,1,1,0,1,1,0,1,1,0,1,1,0,1};
71 static Uint1 template_11_18[] = {1,0,1,1,0,1,1,0,0,1,0,1,1,0,1,1,0,1};
72 static Uint1 template_11_21[] = {1,0,0,1,0,1,1,0,0,1,0,1,1,0,0,1,0,1,1,0,1};
73 static Uint1 template_11_16_opt[] = {1,1,1,0,0,1,0,1,1,0,1,1,0,1,1,1};
74 static Uint1 template_11_18_opt[] = {1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,1,1,1};
75 static Uint1 template_11_21_opt[] = {1,1,1,0,1,0,0,1,0,1,0,0,0,1,0,0,1,0,1,1,1};
76 
77 static Uint1 template_12_16[] = {1,1,1,1,1,0,1,1,0,1,1,0,1,1,0,1};
78 static Uint1 template_12_18[] = {1,0,1,1,0,1,1,0,1,1,0,1,1,0,1,1,0,1};
79 static Uint1 template_12_21[] = {1,0,0,1,0,1,1,0,1,1,0,1,1,0,0,1,0,1,1,0,1};
80 static Uint1 template_12_16_opt[] = {1,1,1,0,1,1,0,1,1,0,1,1,0,1,1,1};
81 static Uint1 template_12_18_opt[] = {1,1,1,0,1,0,1,1,0,0,1,0,1,1,0,1,1,1};
82 static Uint1 template_12_21_opt[] = {1,1,1,0,1,0,0,1,0,1,1,0,0,1,0,0,1,0,1,1,1};
83 
84 #define TINY_GI 80982444
85 #define SM_GI 1945386
86 #define MED_GI 19572546
87 #define LG_GI 39919153
88 #define SUBJECT_GI 39103910
89 
90 #define NULL_NUCL_SENTINEL 0xf
91 
92 struct TestFixture {
104 
106  query_blk = NULL;
107  query_info = NULL;
108  sbp = NULL;
109  subject_blk = NULL;
110  lookup_segments = NULL;
111  lookup_wrap_ptr = NULL;
112  offset_pairs = NULL;
113  program_number = eBlastTypeBlastn;
114  word_options = NULL;
115  hitsaving_options = NULL;
116  ext_options = NULL;
117  }
118 
119  void SetUpQuery(Uint4 query_gi, ENa_strand strand)
120  {
121  char buf[64];
122  Int4 status;
123  // load the query
124  sprintf(buf, "gi|%d", query_gi);
125  CSeq_id id(buf);
126 
127  auto_ptr<SSeqLoc> ssl(CTestObjMgr::Instance().CreateSSeqLoc(id,strand));
128 
130  GetSequence(*ssl->seqloc,
132  ssl->scope,
133  strand,
134  eSentinels));
135 
136  // create the sequence block. The size to pass in
137  // must not include the sentinel bytes on either
138  // end of the sequence
139 
140  query_blk = NULL;
141  status = BlastSeqBlkNew(&query_blk);
142  BOOST_REQUIRE_EQUAL(0, status);
143  status = BlastSeqBlkSetSequence(query_blk, sequence.data.release(),
144  sequence.length - 2);
145  BOOST_REQUIRE_EQUAL(0, status);
146 
147  BOOST_REQUIRE(query_blk != NULL);
148  BOOST_REQUIRE(query_blk->sequence != NULL);
149  BOOST_REQUIRE(query_blk->length > 0);
150 
151  BOOST_REQUIRE(query_blk != NULL);
152  BOOST_REQUIRE(query_blk->sequence[0] != NULL_NUCL_SENTINEL);
153  BOOST_REQUIRE(query_blk->sequence[query_blk->length - 1] !=
155  BOOST_REQUIRE(query_blk->sequence_start[0] == NULL_NUCL_SENTINEL);
156  BOOST_REQUIRE(query_blk->sequence_start[query_blk->length + 1] ==
158  BOOST_REQUIRE_EQUAL(0, (int)query_blk->num_seq_ranges);
159 
160  query_info = BlastQueryInfoNew(program_number, 1);
161 
162 
163  // indicate which regions of the query to index (handle
164  // both strands separately)
165 
166  if (strand == eNa_strand_both) {
167  const int kStrandLength = (query_blk->length - 1)/2;
168  BlastSeqLocNew(&lookup_segments, 0, kStrandLength-1);
169  BlastSeqLocNew(&lookup_segments, kStrandLength + 1,
170  query_blk->length - 1);
171  query_info->contexts[0].query_offset = 0;
172  query_info->contexts[0].query_length = kStrandLength;
173  query_info->contexts[1].query_offset = kStrandLength + 1;
174  query_info->contexts[1].query_length = kStrandLength;
175  }
176  else {
177  BlastSeqLocNew(&lookup_segments, 0, query_blk->length - 1);
178  BOOST_REQUIRE(eNa_strand_plus);
179  query_info->contexts[0].query_offset = 0;
180  query_info->contexts[0].query_length = query_blk->length;
181  query_info->contexts[1].query_offset = query_blk->length + 1;
182  query_info->contexts[1].query_length = 0;
183  query_info->contexts[1].is_valid = FALSE;
184  }
185  }
186 
187  void SetUpSubject(Uint4 subject_gi)
188  {
189  char buf[64];
190  Int4 status;
191 
192  // load the subject sequence in compressed format
193 
194  sprintf(buf, "gi|%d", subject_gi);
195  CSeq_id subject_id(buf);
196 
197  auto_ptr<SSeqLoc>
198  subject_ssl(CTestObjMgr::Instance().CreateSSeqLoc(subject_id,
199  eNa_strand_plus));
200 
201  SBlastSequence subj_sequence(
202  GetSequence(*subject_ssl->seqloc,
204  subject_ssl->scope,
205  eNa_strand_plus,
206  eNoSentinels));
207 
208  // create the sequence block. Retrieve the real
209  // sequence length separately, and verify that
210  // the number of bytes allocated by GetSequence()
211  // is sufficient to hold that many bases
212 
213  subject_blk = NULL;
214  status = BlastSeqBlkNew(&subject_blk);
215  BOOST_REQUIRE_EQUAL(0, status);
216  BOOST_REQUIRE(subject_blk != NULL);
217  subject_blk->length = sequence::GetLength(*subject_ssl->seqloc,
218  subject_ssl->scope);
219  status = BlastSeqBlkSetCompressedSequence(subject_blk,
220  subj_sequence.data.release());
221  BOOST_REQUIRE_EQUAL(0, status);
222  BOOST_REQUIRE(subject_blk->sequence != NULL);
223  BOOST_REQUIRE(subject_blk->length > 0);
224  BOOST_REQUIRE(subject_blk->length / COMPRESSION_RATIO <=
225  (Int4)subj_sequence.length);
226  BOOST_REQUIRE_EQUAL(0, (int)subject_blk->num_seq_ranges);
227  }
228 
229  void SetUpLookupTable(Boolean mb_lookup,
230  EDiscWordType disco_type,
231  Int4 disco_size,
232  Int4 word_size)
233  {
234  LookupTableOptions* lookup_options;
235  BlastScoringOptions* score_options;
236  Int4 status;
237 
238  // set lookup table options
239 
240  status = LookupTableOptionsNew(program_number, &lookup_options);
241  BOOST_REQUIRE_EQUAL(0, status);
242  status = BLAST_FillLookupTableOptions(lookup_options,
243  program_number,
244  mb_lookup, // megablast
245  0, // threshold
246  word_size); // word size
247  BOOST_REQUIRE_EQUAL(0, status);
248 
249  // get ready to fill in the scoring matrix
250 
251  status = BlastScoringOptionsNew(program_number, &score_options);
252  BOOST_REQUIRE_EQUAL(0, status);
253  status = BLAST_FillScoringOptions(score_options,
254  program_number,
255  FALSE, // greedy
256  -3, // penalty
257  1, // reward
258  NULL, // score matrix
259  BLAST_GAP_OPEN_NUCL, // gap open
260  BLAST_GAP_EXTN_NUCL // gap extend
261  );
262  BOOST_REQUIRE_EQUAL(0, status);
263 
264  // fill in the score block
265 
266  BOOST_REQUIRE(query_blk != NULL);
267  const double kScalingFactor = 1.0;
268  Blast_Message *blast_message = NULL;
269  status = BlastSetup_ScoreBlkInit(query_blk, query_info, score_options,
270  program_number, &sbp, kScalingFactor,
271  &blast_message, NULL);
272  BOOST_REQUIRE_EQUAL(0, status);
273  blast_message = Blast_MessageFree(blast_message);
274  BOOST_REQUIRE(blast_message == NULL);
275 
276  // set discontiguous megablast (if applicable)
277 
278  lookup_options->mb_template_length = disco_size;
279  lookup_options->mb_template_type = disco_type;
280 
281  // create the lookup table
282 
283  QuerySetUpOptions* query_options = NULL;
284  BlastQuerySetUpOptionsNew(&query_options);
285  status = LookupTableWrapInit(query_blk,
286  lookup_options,
287  query_options,
288  lookup_segments,
289  sbp,
290  &lookup_wrap_ptr,
291  NULL /* RPS Info */,
292  NULL,
293  NULL);
294  BOOST_REQUIRE_EQUAL(0, status);
295  BlastChooseNaExtend(lookup_wrap_ptr);
296  query_options = BlastQuerySetUpOptionsFree(query_options);
297  BOOST_REQUIRE(query_options == NULL);
298 
299  // create the hit collection arrays
300 
301  offset_pairs = (BlastOffsetPair*)malloc(
302  GetOffsetArraySize(lookup_wrap_ptr) *
303  sizeof(BlastOffsetPair));
304  BOOST_REQUIRE(offset_pairs != NULL);
305 
306  lookup_options = LookupTableOptionsFree(lookup_options);
307  BOOST_REQUIRE(lookup_options == NULL);
308  score_options = BlastScoringOptionsFree(score_options);
309  BOOST_REQUIRE(score_options == NULL);
310  BlastInitialWordOptionsNew(program_number, &word_options);
311  BlastExtensionOptionsNew(program_number, &ext_options, TRUE);
312  BlastHitSavingOptionsNew(program_number, &hitsaving_options, TRUE);
313  }
314 
316  EDiscWordType disco_type, Int4 disco_size, Int4 word_size)
317  {
318  SetUpQuery(gi, eNa_strand_plus);
319  SetUpSubject(SUBJECT_GI);
320  SetUpLookupTable(mb_lookup, disco_type, disco_size, word_size);
321  }
322 
324  {
325  if (query_blk)
326  query_blk = BlastSequenceBlkFree(query_blk);
327  if (lookup_segments)
328  lookup_segments = BlastSeqLocFree(lookup_segments);
329  if (query_info)
330  query_info = BlastQueryInfoFree(query_info);
331  }
332 
334  {
335  if (subject_blk)
336  subject_blk = BlastSequenceBlkFree(subject_blk);
337  }
338 
340  {
341  lookup_wrap_ptr = LookupTableWrapFree(lookup_wrap_ptr);
342  sfree(offset_pairs);
343  if (sbp)
344  sbp = BlastScoreBlkFree(sbp);
345  if (word_options)
346  word_options = BlastInitialWordOptionsFree(word_options);
347  if (hitsaving_options)
348  hitsaving_options = BlastHitSavingOptionsFree(hitsaving_options);
349  if (ext_options)
350  ext_options = BlastExtensionOptionsFree(ext_options);
351  }
352 
354  {
355  TearDownQuery();
356  TearDownSubject();
357  TearDownLookupTable();
358  }
359 
360  Int4 RunScanSubject(Int4 *scan_range,
361  Int4 max_hits)
362  {
363  BOOST_REQUIRE(lookup_wrap_ptr->lut_type == eSmallNaLookupTable ||
364  lookup_wrap_ptr->lut_type == eMBLookupTable);
365 
366  BlastChooseNucleotideScanSubject(lookup_wrap_ptr);
367  TNaScanSubjectFunction callback = NULL;
368  if (lookup_wrap_ptr->lut_type == eMBLookupTable) {
370  lookup_wrap_ptr->lut;
371  callback = (TNaScanSubjectFunction)mb_lt->scansub_callback;
372  }
373  else {
375  lookup_wrap_ptr->lut;
376  callback = (TNaScanSubjectFunction)na_lt->scansub_callback;
377  }
378  BOOST_REQUIRE(callback != NULL);
379  return callback(lookup_wrap_ptr, subject_blk,
380  offset_pairs, max_hits, scan_range);
381  }
382 
383  // Gets called first
385  {
386  Int4 query_bases, subject_bases;
387  Int4 scan_range[2];
388  Int4 bases_per_lut_word;
389  Int4 hits;
390  Uint4 last_s_off = 0;
391  BlastSmallNaLookupTable *na_lt = NULL;
392  BlastMBLookupTable *mb_lt = NULL;
393  Boolean discontig = FALSE;
394 
395  scan_range[0] = 0;
396 
397  BOOST_REQUIRE(query_blk != NULL);
398  BOOST_REQUIRE(subject_blk != NULL);
399  BOOST_REQUIRE(lookup_wrap_ptr != NULL);
400  BOOST_REQUIRE(offset_pairs != NULL);
401  BOOST_REQUIRE(lookup_segments != NULL);
402 
403  subject_bases = subject_blk->length;
404  query_bases = query_blk->length;
405 
406  if (lookup_wrap_ptr->lut_type == eMBLookupTable) {
407  mb_lt = (BlastMBLookupTable *)lookup_wrap_ptr->lut;
408  bases_per_lut_word = mb_lt->lut_word_length;
409  discontig = mb_lt->discontiguous;
410  //mb_lt->scan_step = 1;
411 
412  if (discontig) {
413  scan_range[1] = subject_bases - mb_lt->template_length;
414  }
415  else {
416  scan_range[1] = subject_bases - bases_per_lut_word;
417  }
418  }
419  else {
420  na_lt = (BlastSmallNaLookupTable *)lookup_wrap_ptr->lut;
421  bases_per_lut_word = na_lt->lut_word_length;
422  scan_range[1] = subject_bases - bases_per_lut_word;
423  }
424 
425  while (scan_range[0] <= scan_range[1])
426  {
427  hits = RunScanSubject(scan_range,
428  GetOffsetArraySize(lookup_wrap_ptr));
429 
430  // check number of reported hits
431  BOOST_REQUIRE(hits <= GetOffsetArraySize(lookup_wrap_ptr));
432 
433  // verify that the first offsets in each
434  // list pick up where the last ScanSubject
435  // call left off, without repeated subject
436  // offsets
437 
438  if (!hits)
439  continue;
440 
441  if (last_s_off)
442  BOOST_REQUIRE(offset_pairs[0].qs_offsets.s_off > last_s_off);
443 
444  // verify that
445  // - the offset recovered from the lookup table is in
446  // the interval [0,query_size-bases_per_word]
447  // - no query-subject pair is repeated. This involves
448  // verifying that subject offsets increase monotonically
449  // and, for equal subject offsets, the query offsets
450  // either increase (blastn) or decrease (megablast)
451  // monotonically
452  // Exception: discontiguous megablast with two templates
453  // is allowed to have nondecreasing query offsets
454 
455  for (int i = 1; i < hits; i++)
456  {
457  BOOST_REQUIRE(offset_pairs[i].qs_offsets.q_off <=
458  (Uint4)(query_bases - bases_per_lut_word) &&
459  ((int)offset_pairs[i].qs_offsets.q_off) >= 0);
460  BOOST_REQUIRE(offset_pairs[i].qs_offsets.s_off <
461  (Uint4)subject_bases);
462 
463  if (offset_pairs[i].qs_offsets.s_off ==
464  offset_pairs[i-1].qs_offsets.s_off)
465  {
466  if (mb_lt) {
467  if (disco_type != eMBWordTwoTemplates) {
468  BOOST_REQUIRE(offset_pairs[i].qs_offsets.q_off <
469  offset_pairs[i-1].qs_offsets.q_off);
470  }
471  }
472  else {
473  BOOST_REQUIRE(offset_pairs[i].qs_offsets.q_off >
474  offset_pairs[i-1].qs_offsets.q_off);
475  }
476  }
477  else
478  {
479  BOOST_REQUIRE(offset_pairs[i].qs_offsets.s_off >
480  offset_pairs[i-1].qs_offsets.s_off);
481  }
482  }
483 
484  last_s_off = offset_pairs[hits-1].qs_offsets.s_off;
485  }
486  }
487 
488  // Gets called third
490  {
491  Int4 subject_bases;
492  Int4 hits, found_hits, expected_hits;
493  Int4 scan_range[2];
494  Int4 new_max_size;
495  BlastSmallNaLookupTable *na_lt = NULL;
496  BlastMBLookupTable *mb_lt = NULL;
497  Boolean discontig = FALSE;
498 
499  scan_range[0] = 0;
500  found_hits = expected_hits = 0;
501 
502  BOOST_REQUIRE(query_blk != NULL);
503  BOOST_REQUIRE(subject_blk != NULL);
504  BOOST_REQUIRE(lookup_wrap_ptr != NULL);
505  BOOST_REQUIRE(offset_pairs != NULL);
506  BOOST_REQUIRE(lookup_segments != NULL);
507 
508  subject_bases = subject_blk->length;
509 
510  if (lookup_wrap_ptr->lut_type == eMBLookupTable) {
511  mb_lt = (BlastMBLookupTable *)lookup_wrap_ptr->lut;
512  discontig = mb_lt->discontiguous;
513  //mb_lt->scan_step = 1;
514 
515  if (discontig) {
516  scan_range[1] = subject_bases - mb_lt->template_length;
517  }
518  else {
519  scan_range[1] = subject_bases - mb_lt->lut_word_length;
520  }
521  }
522  else {
523  na_lt = (BlastSmallNaLookupTable *)lookup_wrap_ptr->lut;
524  scan_range[1] = subject_bases - na_lt->lut_word_length;
525  }
526 
527  while (scan_range[0] <= scan_range[1])
528  {
529  hits = RunScanSubject(scan_range,
530  GetOffsetArraySize(lookup_wrap_ptr));
531  BOOST_REQUIRE(hits <= GetOffsetArraySize(lookup_wrap_ptr));
532  expected_hits += hits;
533  }
534 
535  // Verify that the number of collected hits does
536  // not change if the hit list size changes
537 
538  scan_range[0] = 0;
539  if (mb_lt)
540  new_max_size = MAX(GetOffsetArraySize(lookup_wrap_ptr)/5,
541  mb_lt->longest_chain);
542  else
543  new_max_size = MAX(GetOffsetArraySize(lookup_wrap_ptr)/5,
544  na_lt->longest_chain);
545 
546  while (scan_range[0] <= scan_range[1])
547  {
548  hits = RunScanSubject(scan_range,
549  new_max_size);
550  BOOST_REQUIRE(hits <= new_max_size);
551  found_hits += hits;
552  }
553 
554  BOOST_REQUIRE_EQUAL(found_hits, expected_hits);
555  }
556 
557  // Gets called second
559  {
560  Int4 subject_bases;
561  Int4 hits, found_hits, expected_hits;
562  Int4 scan_range[2];
563  Int4 bases_per_lut_word;
564  BlastSmallNaLookupTable *na_lt = NULL;
565  BlastMBLookupTable *mb_lt = NULL;
566  Boolean discontig = FALSE;
567 
568  scan_range[0] = 0;
569  found_hits = expected_hits = 0;
570 
571  BOOST_REQUIRE(query_blk != NULL);
572  BOOST_REQUIRE(subject_blk != NULL);
573  BOOST_REQUIRE(lookup_wrap_ptr != NULL);
574  BOOST_REQUIRE(offset_pairs != NULL);
575  BOOST_REQUIRE(lookup_segments != NULL);
576 
577  subject_bases = subject_blk->length;
578 
579  if (lookup_wrap_ptr->lut_type == eMBLookupTable) {
580  mb_lt = (BlastMBLookupTable *)lookup_wrap_ptr->lut;
581  bases_per_lut_word = mb_lt->lut_word_length;
582  discontig = mb_lt->discontiguous;
583  //mb_lt->scan_step = 1;
584 
585  if (discontig) {
586  scan_range[1] = subject_bases - mb_lt->template_length;
587  }
588  else {
589  scan_range[1] = subject_bases - bases_per_lut_word;
590  }
591  }
592  else {
593  na_lt = (BlastSmallNaLookupTable *)lookup_wrap_ptr->lut;
594  bases_per_lut_word = na_lt->lut_word_length;
595  scan_range[1] = subject_bases - bases_per_lut_word;
596  }
597 
598  while (scan_range[0] <= scan_range[1])
599  {
600  hits = RunScanSubject(scan_range,
601  GetOffsetArraySize(lookup_wrap_ptr));
602  BOOST_REQUIRE(hits <= GetOffsetArraySize(lookup_wrap_ptr));
603  found_hits += hits;
604 
605  for (int i = 0; i < hits; i++)
606  {
607  Uint4 query_word = 0;
608  Uint4 query_word2 = 0;
609  Uint4 subject_word = 0;
610  Uint4 subject_word2 = 0;
611  Int4 s_index, s_byte;
612  Int4 j;
613  Uint1 *q = query_blk->sequence +
614  offset_pairs[i].qs_offsets.q_off;
615 
616  if (discontig) {
617  Uint1 *disco_template = NULL;
618  Uint1 *disco_template2 = NULL;
619  Int4 template_size = 0;
620 
621  switch (mb_lt->template_type) {
623  disco_template = template_11_16;
624  disco_template2 = template_11_16_opt;
625  template_size = 16;
626  break;
628  disco_template = template_11_18;
629  disco_template2 = template_11_18_opt;
630  template_size = 18;
631  break;
633  disco_template = template_11_21;
634  disco_template2 = template_11_21_opt;
635  template_size = 21;
636  break;
638  disco_template = template_11_16_opt;
639  template_size = 16;
640  break;
642  disco_template = template_11_18_opt;
643  template_size = 18;
644  break;
646  disco_template = template_11_21_opt;
647  template_size = 21;
648  break;
650  disco_template = template_12_16;
651  disco_template2 = template_12_16_opt;
652  template_size = 16;
653  break;
655  disco_template = template_12_18;
656  disco_template2 = template_12_18_opt;
657  template_size = 18;
658  break;
660  disco_template = template_12_21;
661  disco_template2 = template_12_21_opt;
662  template_size = 21;
663  break;
665  disco_template = template_12_16_opt;
666  template_size = 16;
667  break;
669  disco_template = template_12_18_opt;
670  template_size = 18;
671  break;
673  disco_template = template_12_21_opt;
674  template_size = 21;
675  break;
676  default:
677  break;
678  }
679 
680  s_index = offset_pairs[i].qs_offsets.s_off;
681  for (j = 0; j < template_size; j++, s_index++) {
682  if (disco_template[j] == 1) {
683  query_word = (query_word << 2) | q[j];
684  s_byte = subject_blk->sequence[ s_index /
686  subject_word = (subject_word << 2) |
687  ((s_byte >> (2 * (COMPRESSION_RATIO - 1 -
688  (s_index % COMPRESSION_RATIO)))) & 0x3);
689  }
690  }
691 
692  if (disco_type == eMBWordTwoTemplates) {
693  s_index = offset_pairs[i].qs_offsets.s_off;
694  for (j = 0; j < template_size; j++, s_index++) {
695  if (disco_template2[j] == 1) {
696  query_word2 = (query_word2 << 2) | q[j];
697  s_byte = subject_blk->sequence[ s_index /
699  subject_word2 = (subject_word2 << 2) |
700  ((s_byte >> (2 * (COMPRESSION_RATIO - 1 -
701  (s_index % COMPRESSION_RATIO)))) & 0x3);
702  }
703  }
704  }
705  }
706  else {
707 
708  s_index = offset_pairs[i].qs_offsets.s_off;
709  for (j = 0; j < bases_per_lut_word; j++, s_index++) {
710  query_word = (query_word << 2) | q[j];
711  s_byte = subject_blk->sequence[ s_index /
713  subject_word = (subject_word << 2) |
714  ((s_byte >> (2 * (COMPRESSION_RATIO - 1 -
715  (s_index % COMPRESSION_RATIO)))) & 0x3);
716  }
717  }
718  if (disco_type == eMBWordTwoTemplates)
719  BOOST_REQUIRE(query_word == subject_word ||
720  query_word2 == subject_word2);
721  else
722  BOOST_REQUIRE_EQUAL(query_word, subject_word);
723  }
724  }
725  }
726 
727  // Called fourth
729  {
730  Int2 retval = 0;
731  const Int4 subject_bases = subject_blk->length;
732 
733  BOOST_REQUIRE(query_blk != NULL);
734  BOOST_REQUIRE(subject_blk != NULL);
735  BOOST_REQUIRE(lookup_wrap_ptr != NULL);
736  BOOST_REQUIRE(offset_pairs != NULL);
737  BOOST_REQUIRE(lookup_segments != NULL);
738 
739  SSeqRange ranges2scan[] = { {0, 501}, {700, 1001} , {subject_bases, subject_bases}};
740  const size_t kNumRanges = (sizeof(ranges2scan)/sizeof(*ranges2scan));
741  BlastSeqBlkSetSeqRanges(subject_blk, ranges2scan, kNumRanges, FALSE, eSoftSubjMasking);
742 
743  BlastHitSavingParameters* hit_params = NULL;
744  retval = BlastHitSavingParametersNew(program_number, hitsaving_options,
745  sbp, query_info, subject_bases,
746  0, &hit_params);
747  BOOST_REQUIRE_EQUAL(0, retval);
748 
749  BlastInitialWordParameters* word_params = NULL;
750  retval = BlastInitialWordParametersNew(program_number, word_options,
751  hit_params, lookup_wrap_ptr,
752  sbp, query_info, subject_bases,
753  &word_params);
754  BOOST_REQUIRE_EQUAL(0, retval);
755 
756  Blast_ExtendWord* ewp = NULL;
757  retval = BlastExtendWordNew(query_blk->length, word_params, &ewp);
758  BOOST_REQUIRE_EQUAL(0, retval);
759 
760  BlastInitHitList* init_hitlist = BLAST_InitHitListNew();
761  BlastUngappedStats ungapped_stats = {0,};
762  retval = BlastNaWordFinder(subject_blk, query_blk, query_info,
763  lookup_wrap_ptr, sbp->matrix->data,
764  word_params, ewp, offset_pairs,
765  GetOffsetArraySize(lookup_wrap_ptr),
766  init_hitlist, &ungapped_stats);
767  BOOST_REQUIRE_EQUAL(0, retval);
768 
769  // Now for the tests...
770  for (int i = 0; i < init_hitlist->total; i++) {
771  const BlastInitHSP& init_hsp = init_hitlist->init_hsp_array[i];
772  const Uint4 s_off = init_hsp.offsets.qs_offsets.s_off;
773  bool hit_found = FALSE;
774  for (size_t j = 0; j < kNumRanges; j++) {
775  if ( s_off >= (Uint4)ranges2scan[j].left &&
776  s_off < (Uint4)ranges2scan[j].right ) {
777  hit_found = TRUE;
778  break;
779  }
780  }
781  BOOST_REQUIRE( hit_found );
782  }
783 
784  hit_params = BlastHitSavingParametersFree(hit_params);
785  BOOST_REQUIRE(hit_params == NULL);
786  word_params = BlastInitialWordParametersFree(word_params);
787  BOOST_REQUIRE(word_params == NULL);
788  ewp = BlastExtendWordFree(ewp);
789  BOOST_REQUIRE(ewp == NULL);
790  init_hitlist = BLAST_InitHitListFree(init_hitlist);
791  BOOST_REQUIRE(init_hitlist == NULL);
792  }
793 };
794 
795 BOOST_FIXTURE_TEST_SUITE( ntscan, TestFixture )
796 
797 BOOST_AUTO_TEST_CASE( DiscontigTwoSubjects )
798 {
799  Int4 subject_bases;
800  Int4 scan_range[2];
801  Int4 hits;
802  Int4 i;
803  BlastMBLookupTable *mb_lt = NULL;
804  const Int4 kWordSize = 12;
805  const Int4 kTemplateSize = 21;
806 
807  SetUpQuery(555, eNa_strand_both);
808  SetUpLookupTable(TRUE, eMBWordTwoTemplates,
809  kTemplateSize, kWordSize);
810  BOOST_REQUIRE(lookup_wrap_ptr->lut_type == eMBLookupTable);
811  mb_lt = (BlastMBLookupTable *)lookup_wrap_ptr->lut;
812  //mb_lt->scan_step = 1;
813 
814  SetUpSubject(313959);
815  scan_range[0] = 0;
816  subject_bases = subject_blk->length;
817  scan_range[1] = subject_bases - mb_lt->template_length;
818 
819  while (scan_range[0] <= scan_range[1])
820  {
821  hits = RunScanSubject(scan_range,
822  GetOffsetArraySize(lookup_wrap_ptr));
823  }
824 
825  TearDownSubject();
826  SetUpSubject(271065); // smaller subject sequence
827  scan_range[0] = 0;
828  subject_bases = subject_blk->length;
829  scan_range[1] = subject_bases - mb_lt->template_length;
830 
831  while (scan_range[0] <= scan_range[1])
832  {
833  hits = RunScanSubject(scan_range,
834  GetOffsetArraySize(lookup_wrap_ptr));
835 
836  // verify that none of the lookup table hits are 'reused'
837  // from the last subject sequence
838 
839  for (i = 0; i < hits; i++) {
840  BOOST_REQUIRE(offset_pairs[i].qs_offsets.s_off <
841  (Uint4)subject_bases);
842  }
843  }
844 }
845 
846 #define DECLARE_TEST(name, gi, d_size, d_type, wordsize) \
847 BOOST_AUTO_TEST_CASE( name##ScanOffsetSize##wordsize ) { \
848  SetUpQuerySubjectAndLUT(TRUE, gi, (EDiscWordType)d_type, d_size, wordsize);\
849  ScanOffsetTestCore((EDiscWordType)d_type); \
850  ScanCheckHitsCore((EDiscWordType)d_type); \
851  ScanMaxHitsTestCore(); \
852  SkipMaskedRangesCore(); \
853 }
854 
855 DECLARE_TEST(Tiny, TINY_GI, 0, 0, 4);
856 DECLARE_TEST(Tiny, TINY_GI, 0, 0, 5);
857 DECLARE_TEST(Tiny, TINY_GI, 0, 0, 6);
858 DECLARE_TEST(Tiny, TINY_GI, 0, 0, 7);
859 
860 DECLARE_TEST(Small, SM_GI, 0, 0, 6);
861 DECLARE_TEST(Small, SM_GI, 0, 0, 7);
862 DECLARE_TEST(Small, SM_GI, 0, 0, 8);
863 DECLARE_TEST(Small, SM_GI, 0, 0, 9);
864 DECLARE_TEST(Small, SM_GI, 0, 0, 10);
865 
866 DECLARE_TEST(Medium, MED_GI, 0, 0, 9);
867 DECLARE_TEST(Medium, MED_GI, 0, 0, 10);
868 DECLARE_TEST(Medium, MED_GI, 0, 0, 11);
869 DECLARE_TEST(Medium, MED_GI, 0, 0, 12);
870 DECLARE_TEST(Medium, MED_GI, 0, 0, 13);
871 DECLARE_TEST(Medium, MED_GI, 0, 0, 14);
872 DECLARE_TEST(Medium, MED_GI, 0, 0, 15);
873 DECLARE_TEST(Medium, MED_GI, 0, 0, 20);
874 
875 DECLARE_TEST(Large, LG_GI, 0, 0, 11);
876 DECLARE_TEST(Large, LG_GI, 0, 0, 12);
877 DECLARE_TEST(Large, LG_GI, 0, 0, 13);
878 DECLARE_TEST(Large, LG_GI, 0, 0, 15);
879 DECLARE_TEST(Large, LG_GI, 0, 0, 20);
880 DECLARE_TEST(Large, LG_GI, 0, 0, 25);
881 DECLARE_TEST(Large, LG_GI, 0, 0, 28);
882 DECLARE_TEST(Large, LG_GI, 0, 0, 33);
883 DECLARE_TEST(Large, LG_GI, 0, 0, 37);
884 
885 DECLARE_TEST(Disco_Coding_16_, MED_GI, 16, eMBWordCoding, 11)
886 DECLARE_TEST(Disco_Coding_18_, MED_GI, 18, eMBWordCoding, 11)
887 DECLARE_TEST(Disco_Coding_21_, MED_GI, 21, eMBWordCoding, 11)
888 DECLARE_TEST(Disco_Optimal_16_, MED_GI, 16, eMBWordOptimal, 11)
889 DECLARE_TEST(Disco_Optimal_18_, MED_GI, 18, eMBWordOptimal, 11)
890 DECLARE_TEST(Disco_Optimal_21_, MED_GI, 21, eMBWordOptimal, 11)
891 
892 DECLARE_TEST(Disco_2Templ_16_, MED_GI, 16, eMBWordTwoTemplates, 11)
893 DECLARE_TEST(Disco_2Templ_18_, MED_GI, 18, eMBWordTwoTemplates, 11)
894 DECLARE_TEST(Disco_2Templ_21_, MED_GI, 21, eMBWordTwoTemplates, 11)
895 
896 DECLARE_TEST(Disco_Coding_16_, MED_GI, 16, eMBWordCoding, 12)
897 DECLARE_TEST(Disco_Coding_18_, MED_GI, 18, eMBWordCoding, 12)
898 DECLARE_TEST(Disco_Coding_21_, MED_GI, 21, eMBWordCoding, 12)
899 DECLARE_TEST(Disco_Optimal_16_, MED_GI, 16, eMBWordOptimal, 12)
900 DECLARE_TEST(Disco_Optimal_18_, MED_GI, 18, eMBWordOptimal, 12)
901 DECLARE_TEST(Disco_Optimal_21_, MED_GI, 21, eMBWordOptimal, 12)
902 
903 DECLARE_TEST(Disco_2Templ_16_, MED_GI, 16, eMBWordTwoTemplates, 12)
904 DECLARE_TEST(Disco_2Templ_18_, MED_GI, 18, eMBWordTwoTemplates, 12)
905 DECLARE_TEST(Disco_2Templ_21_, MED_GI, 21, eMBWordTwoTemplates, 12)
906 
908 #endif
Int2 BlastHitSavingParametersNew(EBlastProgramType program_number, const BlastHitSavingOptions *options, const BlastScoreBlk *sbp, const BlastQueryInfo *query_info, Int4 avg_subject_length, Int4 compositionBasedStats, BlastHitSavingParameters **parameters)
Allocate memory and initialize the BlastHitSavingParameters structure.
BlastInitHitList * BLAST_InitHitListNew(void)
Allocate memory for the BlastInitHitList structure.
Definition: blast_extend.c:216
#define TINY_GI
Blast_ExtendWord * BlastExtendWordFree(Blast_ExtendWord *ewp)
Deallocate memory for the word extension structure.
Definition: blast_extend.c:203
Uint4 q_off
Query offset.
Definition: blast_def.h:143
Int4 total
Total number of hits currently saved.
Definition: blast_extend.h:159
char * buf
void ScanOffsetTestCore(EDiscWordType disco_type)
ELookupTableType lut_type
What kind of a lookup table it is?
Definition: lookup_wrap.h:51
Int4 template_length
Length of the discontiguous word template.
Int2 BlastSeqBlkSetCompressedSequence(BLAST_SequenceBlk *seq_blk, const Uint1 *sequence)
Stores the compressed nucleotide sequence in the sequence block structure for the subject sequence wh...
Definition: blast_util.c:167
Declares the CBl2Seq (BLAST 2 Sequences) class.
void SetUpLookupTable(Boolean mb_lookup, EDiscWordType disco_type, Int4 disco_size, Int4 word_size)
SBlastSequence GetSequence(const objects::CSeq_loc &sl, EBlastEncoding encoding, objects::CScope *scope, objects::ENa_strand strand=objects::eNa_strand_plus, ESentinelType sentinel=eSentinels, std::string *warnings=NULL)
Retrieves a sequence using the object manager.
Scoring options block Used to produce the BlastScoreBlk structure This structure may be needed for lo...
signed int Int4
Alias for signed int.
Definition: ncbitype.h:120
#define NULL_NUCL_SENTINEL
LookupTableWrap * LookupTableWrapFree(LookupTableWrap *lookup)
Deallocate memory for the lookup table.
Definition: lookup_wrap.c:197
SBlastScoreMatrix * matrix
scoring matrix data
Definition: blast_stat.h:185
unsigned int Uint4
Alias for unsigned int.
Definition: ncbitype.h:121
Structure to hold the initial HSP information.
Definition: blast_extend.h:150
#define TRUE
bool replacment for C indicating true.
Definition: ncbi_std.h:95
BlastInitialWordOptions * BlastInitialWordOptionsFree(BlastInitialWordOptions *options)
Deallocate memory for BlastInitialWordOptions.
Int2 LookupTableWrapInit(BLAST_SequenceBlk *query, const LookupTableOptions *lookup_options, const QuerySetUpOptions *query_options, BlastSeqLoc *lookup_segments, BlastScoreBlk *sbp, LookupTableWrap **lookup_wrap_ptr, const BlastRPSInfo *rps_info, Blast_Message **error_msg, BlastSeqSrc *seqsrc)
Create the lookup table for all query words.
Definition: lookup_wrap.c:47
void * scansub_callback
function for scanning subject sequences
Int4 GetOffsetArraySize(LookupTableWrap *lookup)
Determine the size of the offsets arrays to be filled by the ScanSubject function.
Definition: lookup_wrap.c:255
static Uint1 template_12_18_opt[]
void SetUpQuery(Uint4 query_gi, ENa_strand strand)
BlastInitHSP * init_hsp_array
Array of offset pairs, possibly with scores.
Definition: blast_extend.h:161
void SetUpSubject(Uint4 subject_gi)
void TearDownLookupTable()
CConstRef< objects::CSeq_loc > seqloc
Seq-loc describing the sequence to use as query/subject to BLAST The types of Seq-loc currently suppo...
Definition: sseqloc.hpp:50
Nucleotide ungapped extension code.
Int4 lut_word_length
number of letters in a lookup table word
Int4 lut_word_length
Length in bases of a word indexed by the lookup table.
Options used when evaluating and saving hits These include: a.
void * lut
Pointer to the actual lookup table structure.
Definition: lookup_wrap.h:52
Structure used for scoring calculations.
Definition: blast_stat.h:177
static Uint1 template_11_18_opt[]
static Uint1 template_12_21[]
static Uint1 template_12_16[]
static Uint1 template_11_16_opt[]
Uint4 num_seq_ranges
Number of elements in seq_ranges.
Definition: blast_def.h:281
STL namespace.
BlastContextInfo * contexts
Information per context.
const NCBI_NS_NCBI::CEnumeratedTypeValues *ENUM_METHOD_NAME() ENa_strand(void)
Access to ENa_strand's attributes (values, names) as defined in spec.
#define NULL
Definition: ncbistd.hpp:225
BlastSeqLoc * BlastSeqLocNew(BlastSeqLoc **head, Int4 from, Int4 to)
Create and initialize a new sequence interval.
Definition: blast_filter.c:608
Uint1 * sequence_start
Start of sequence, usually one byte before sequence as that byte is a NULL sentinel byte...
Definition: blast_def.h:244
megablast lookup table (includes both contiguous and discontiguous megablast)
Blast_Message * Blast_MessageFree(Blast_Message *blast_msg)
Deallocates message memory.
Definition: blast_message.c:80
#define MAX(a, b)
returns larger of a and b.
Definition: ncbi_std.h:115
Int2 BlastInitialWordParametersNew(EBlastProgramType program_number, const BlastInitialWordOptions *word_options, const BlastHitSavingParameters *hit_params, const LookupTableWrap *lookup_wrap, const BlastScoreBlk *sbp, BlastQueryInfo *query_info, Uint4 subject_length, BlastInitialWordParameters **parameters)
Allocate memory for BlastInitialWordParameters and set x_dropoff.
BlastSeqLoc * BlastSeqLocFree(BlastSeqLoc *loc)
Deallocate all BlastSeqLoc objects in a chain.
Definition: blast_filter.c:737
BlastInitialWordOptions * word_options
BLAST_SequenceBlk * subject_blk
int i
Int2 BLAST_FillLookupTableOptions(LookupTableOptions *options, EBlastProgramType program, Boolean is_megablast, double threshold, Int4 word_size)
Allocate memory for lookup table options and fill with default values.
BlastQueryInfo * BlastQueryInfoFree(BlastQueryInfo *query_info)
Deallocate memory for query information structure.
void ScanCheckHitsCore(EDiscWordType disco_type)
Defines: CTimeFormat - storage class for time format.
Options required for setting up the query sequence.
BlastHitSavingOptions * BlastHitSavingOptionsFree(BlastHitSavingOptions *options)
Deallocate memory for BlastHitSavingOptions.
BlastExtensionOptions * BlastExtensionOptionsFree(BlastExtensionOptions *options)
Deallocate memory for BlastExtensionOptions.
TSeqPos length
Length of the buffer above (not necessarily sequence length!)
Definition: blast_setup.hpp:65
Int2 BlastHitSavingOptionsNew(EBlastProgramType program, BlastHitSavingOptions **options, Boolean gapped_calculation)
Allocate memory for BlastHitSavingOptions.
Int2 BlastInitialWordOptionsNew(EBlastProgramType program, BlastInitialWordOptions **options)
Allocate memory for BlastInitialWordOptions and fill with default values.
unsigned char Uint1
Alias for unsigned char.
Definition: ncbitype.h:117
void SetUpQuerySubjectAndLUT(Boolean mb_lookup, Int4 gi, EDiscWordType disco_type, Int4 disco_size, Int4 word_size)
lookup table for blastn with small query
BlastScoreBlk * BlastScoreBlkFree(BlastScoreBlk *sbp)
Deallocates BlastScoreBlk as well as all associated structures.
Definition: blast_stat.c:965
Int2 BlastSetup_ScoreBlkInit(BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info, const BlastScoringOptions *scoring_options, EBlastProgramType program_number, BlastScoreBlk **sbpp, double scale_factor, Blast_Message **blast_message, GET_MATRIX_PATH get_path)
Initializes the score block structure.
Definition: blast_setup.c:456
This symbol enables the verbose option in makeblastdb and other BLAST+ search command line applicatio...
Definition: blast_def.h:141
#define DECLARE_TEST(name, gi, d_size, d_type, wordsize)
Int2 BlastExtendWordNew(Uint4 query_length, const BlastInitialWordParameters *word_params, Blast_ExtendWord **ewp_ptr)
Initializes the word extension structure.
Definition: blast_extend.c:110
BlastScoringOptions * BlastScoringOptionsFree(BlastScoringOptions *options)
Deallocate memory for BlastScoringOptions.
BLAST_SequenceBlk * query_blk
BlastSeqLoc * lookup_segments
element_type * release(void)
Release will release ownership of pointer to caller.
Definition: ncbimisc.hpp:464
Used to hold a set of positions, mostly used for filtering.
Definition: blast_def.h:204
Int2 BlastScoringOptionsNew(EBlastProgramType program, BlastScoringOptions **options)
Allocate memory for BlastScoringOptions and fill with default values.
Utility stuff for more convenient using of Boost.Test library.
BlastHitSavingOptions * hitsaving_options
Int2 BlastQuerySetUpOptionsNew(QuerySetUpOptions **options)
Allocate memory for QuerySetUpOptions and fill with default values.
#define FALSE
bool replacment for C indicating false.
Definition: ncbi_std.h:99
#define BLAST_GAP_OPEN_NUCL
default gap open penalty (blastn)
Definition: blast_options.h:93
BlastQueryInfo * BlastQueryInfoNew(EBlastProgramType program, int num_queries)
Allocate memory for query information structure.
LookupTableWrap * lookup_wrap_ptr
Int2 BlastNaWordFinder(BLAST_SequenceBlk *subject, BLAST_SequenceBlk *query, BlastQueryInfo *query_info, LookupTableWrap *lookup_wrap, Int4 **matrix, const BlastInitialWordParameters *word_params, Blast_ExtendWord *ewp, BlastOffsetPair *offset_pairs, Int4 max_hits, BlastInitHitList *init_hitlist, BlastUngappedStats *ungapped_stats)
Find all words for a given subject sequence and perform ungapped extensions, assuming ordinary blastn...
Definition: na_ungapped.c:1597
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm. ...
Definition: blast_program.h:72
void BlastChooseNaExtend(LookupTableWrap *lookup_wrap)
Choose the best routine to use for creating ungapped alignments.
Definition: na_ungapped.c:1791
Int4 mb_template_length
Length of the discontiguous words.
BOOST_AUTO_TEST_CASE(DiscontigTwoSubjects)
Int2 LookupTableOptionsNew(EBlastProgramType program, LookupTableOptions **options)
Allocate memory for lookup table options and fill with default values.
union BlastOffsetPair BlastOffsetPair
This symbol enables the verbose option in makeblastdb and other BLAST+ search command line applicatio...
The Object manager core.
BlastHitSavingParameters * BlastHitSavingParametersFree(BlastHitSavingParameters *parameters)
Deallocate memory for BlastHitSavingOptions*.
Uint4 s_off
Subject offset.
Definition: blast_def.h:144
EDiscWordType
General types of discontiguous word templates.
Structure to hold the a message from the core of the BLAST engine.
Definition: blast_message.h:70
Int4 longest_chain
length of the longest chain on the backbone
BlastOffsetPair * offset_pairs
Int2 BLAST_FillScoringOptions(BlastScoringOptions *options, EBlastProgramType program, Boolean greedy_extension, Int4 penalty, Int4 reward, const char *matrix, Int4 gap_open, Int4 gap_extend)
Fill non-default values in the BlastScoringOptions structure.
BlastScoreBlk * sbp
QuerySetUpOptions * BlastQuerySetUpOptionsFree(QuerySetUpOptions *options)
Deallocate memory for QuerySetUpOptions.
if(yy_accept[yy_current_state])
EDiscTemplateType template_type
Type of the discontiguous word template.
Magic spell ;-) needed for some weird compilers... very empiric.
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
Definition: blast_def.h:112
#define LG_GI
EBlastProgramType program_number
Boolean discontiguous
Are discontiguous words used?
Parameter block that contains a pointer to BlastHitSavingOptions and the values derived from it...
#define COMPRESSION_RATIO
Compression ratio of nucleotide bases (4 bases in 1 byte)
Definition: blast_def.h:83
BLAST_SequenceBlk * BlastSequenceBlkFree(BLAST_SequenceBlk *seq_blk)
Deallocate memory for a sequence block.
Definition: blast_util.c:245
#define SM_GI
BlastOffsetPair offsets
Offsets in query and subject, or, in PHI BLAST, start and end of pattern in subject.
Definition: blast_extend.h:151
#define SUBJECT_GI
Int4 query_length
Length of this query, strand or frame.
Wrapper structure for different types of BLAST lookup tables.
Definition: lookup_wrap.h:50
void BlastChooseNucleotideScanSubject(LookupTableWrap *lookup_wrap)
Choose the most appropriate function to scan through nucleotide subject sequences.
BOOST_AUTO_TEST_SUITE_END() static int s_GetSegmentFlags(const CBioseq &bioseq)
Auxiliary class to convert a string into an argument count and vector.
static Uint1 template_11_18[]
struct BlastOffsetPair::@6 qs_offsets
Query/subject offset pair.
LookupTableOptions * LookupTableOptionsFree(LookupTableOptions *options)
Deallocates memory for LookupTableOptions*.
Boolean is_valid
Determine if this context is valid or not.
Declarations of static arrays used to define some NCBI encodings to be used in a toolkit independent ...
Int2 BlastSeqBlkSetSeqRanges(BLAST_SequenceBlk *seq_blk, SSeqRange *seq_ranges, Uint4 num_seq_ranges, Boolean copy_seq_ranges, ESubjectMaskingType mask_type)
Sets the seq_range and related fields appropriately in the BLAST_SequenceBlk structure.
Definition: blast_util.c:182
A structure containing two integers, used e.g.
Definition: blast_def.h:155
Structure for keeping initial word extension information.
Definition: blast_extend.h:109
Special encoding for preliminary stage of BLAST: permutation of NCBI4na.
Int2 BlastExtensionOptionsNew(EBlastProgramType program, BlastExtensionOptions **options, Boolean gapped)
Allocate memory for BlastExtensionOptions and fill with default values.
static CTestObjMgr & Instance()
Definition: test_objmgr.cpp:71
Routines for creating nucleotide BLAST lookup tables.
BlastInitialWordParameters * BlastInitialWordParametersFree(BlastInitialWordParameters *parameters)
Deallocate memory for BlastInitialWordParameters.
Uint1 * sequence
Sequence used for search (could be translation).
Definition: blast_def.h:243
Definitions which are dependant on the NCBI C++ Object Manager.
Uint1 Boolean
bool replacment for C
Definition: ncbi_std.h:92
BlastExtensionOptions * ext_options
void SkipMaskedRangesCore(void)
Options needed for initial word finding and processing.
Int4(* TNaScanSubjectFunction)(const LookupTableWrap *lookup_wrap, const BLAST_SequenceBlk *subject, BlastOffsetPair *NCBI_RESTRICT offset_pairs, Int4 max_hits, Int4 *scan_range)
Generic prototype for nucleotide subject scanning routines.
Definition: blast_nascan.h:43
Structure to hold a sequence.
Definition: blast_def.h:242
Int4 mb_template_type
Type of a discontiguous word template.
Structure to hold all initial HSPs for a given subject sequence.
Definition: blast_extend.h:158
Definitions and prototypes used by blast_stat.c to calculate BLAST statistics.
Options needed to construct a lookup table Also needed: query sequence and query length.
Int4 query_offset
Offset of this query, strand or frame in the concatenated super-query.
Structure to store sequence data and its length for use in the CORE of BLAST (it's a malloc'ed array ...
Definition: blast_setup.hpp:62
int ** data
actual scoring matrix data, stored in row-major form
Definition: blast_stat.h:140
void * scansub_callback
function for scanning subject sequences
Parameter block that contains a pointer to BlastInitialWordOptions and the values derived from it...
Utilities initialize/setup BLAST.
static const char * kWordSize
TAutoUint1Ptr data
Sequence data.
Definition: blast_setup.hpp:64
Int4 RunScanSubject(Int4 *scan_range, Int4 max_hits)
Utility functions for lookup table generation.
static Uint1 template_12_18[]
static Uint1 template_11_21_opt[]
The query related information.
static Uint1 template_11_21[]
Structure containing hit counts from the ungapped stage of a BLAST search.
Do not use sentinel bytes.
Definition: blast_setup.hpp:95
Use sentinel bytes.
Definition: blast_setup.hpp:94
BlastQueryInfo * query_info
The lookup table structure used for Mega BLAST.
CRef< objects::CScope > scope
Scope where the sequence referenced can be found by the toolkit's object manager. ...
Definition: sseqloc.hpp:54
#define BLAST_GAP_EXTN_NUCL
default gap open penalty (blastn)
Int4 longest_chain
Largest number of query positions for a given word.
void ScanMaxHitsTestCore(void)
Options used for gapped extension These include: a.
Lookup table structure for blastn searches with small queries.
static Uint1 template_11_16[]
signed short Int2
Alias for signed short.
Definition: ncbitype.h:118
Int2 BlastSeqBlkNew(BLAST_SequenceBlk **retval)
Allocates a new sequence block structure.
Definition: blast_util.c:133
Int2 BlastSeqBlkSetSequence(BLAST_SequenceBlk *seq_blk, const Uint1 *sequence, Int4 seqlen)
Stores the sequence in the sequence block structure.
Definition: blast_util.c:147
Int4 length
Length of sequence.
Definition: blast_def.h:246
Routines for scanning nucleotide BLAST lookup tables.
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
static Uint1 template_12_21_opt[]
BlastInitHitList * BLAST_InitHitListFree(BlastInitHitList *init_hitlist)
Free memory for the BlastInitList structure.
Definition: blast_extend.c:261
static Uint1 template_12_16_opt[]
Modified on Tue Jan 16 15:38:47 2018 by modify_doxy.py rev. 546573