68 for (
int i = query_info->
first_context; i <= query_info->last_context;
i++)
94 size_t* tot_length =
NULL,
95 vector<ENa_strand>* strands =
NULL,
102 retval.reserve(gi_length.size());
105 BOOST_REQUIRE(strands->size() == gi_length.size());
108 BOOST_REQUIRE(masks->size() == gi_length.size());
111 for (
size_t i = 0;
i < gi_length.size();
i++) {
116 loc->
SetInt().SetTo(gi_length[
i].second-1);
123 retval.push_back(
SSeqLoc(loc, &*scope));
125 *tot_length += gi_length[
i].second;
133 for (
size_t i = 0;
i < masks->size();
i++) {
138 if (single_query_masks.size() == 1) {
140 single_query_masks.front()->GetInterval();
158 static const int kDefaultIntValue = -1;
162 if (m_Config.
Empty()) {
169 const string fname(
"data/split_query.ini");
170 ifstream config_file(fname.c_str());
173 if (m_Config->
Empty()) {
174 throw runtime_error(
"Failed to read configuration file" +
198 BOOST_REQUIRE(seq_blk);
199 BOOST_REQUIRE(qinfo);
202 for (
int i = 0; gis[
i] != -1;
i++) {
207 queries.push_back(
SSeqLoc(loc, scope));
221 prog, strand_opt, msgs);
229 vector< vector<int> >& starting_chunks,
230 vector< vector<int> >& absolute_contexts,
231 vector< vector<size_t> >* context_offsets,
233 vector<ENa_strand>* query_strands =
NULL) {
236 BOOST_REQUIRE_EQUAL(gi_length.size(), query_strands->size());
246 BOOST_REQUIRE_EQUAL(num_chunks, nc);
251 if ( !query_strands ) {
267 for (
size_t chunk_num = 0; chunk_num < num_chunks; chunk_num++) {
269 vector<int>& st_chunks = starting_chunks[chunk_num];
270 for (
size_t context_in_chunk = 0;
271 context_in_chunk < st_chunks.size();
272 context_in_chunk++) {
274 os <<
"Starting chunks: ";
275 os <<
"Chunk " << chunk_num <<
", context " << context_in_chunk;
278 BOOST_REQUIRE_MESSAGE(st_chunks[context_in_chunk]==sc,os.str());
282 vector<int>& abs_ctxts = absolute_contexts[chunk_num];
283 for (
size_t context_in_chunk = 0;
284 context_in_chunk < abs_ctxts.size();
285 context_in_chunk++) {
287 os <<
"Absolute contexts: ";
288 os <<
"Chunk " << chunk_num <<
", context " << context_in_chunk;
292 BOOST_REQUIRE_MESSAGE(abs_ctxts[context_in_chunk]==abs_ctx,os.str());
297 if ( !context_offsets ) {
304 for (
size_t chunk_num = 0; chunk_num < num_chunks; chunk_num++) {
305 vector<size_t> test_ctx_off =
307 const vector<size_t>& ref_ctx_off = (*context_offsets)[chunk_num];
310 os <<
"Number of context offsets in chunk " << chunk_num;
311 BOOST_REQUIRE_MESSAGE(ref_ctx_off.size()==test_ctx_off.size(),os.str());
319 for (
size_t i = 0;
i < ref_ctx_off.size();
i++) {
320 size_t correction = ref_ctx_off[
i];
322 os <<
"Context correction in chunk " << chunk_num
323 <<
", context " <<
i <<
" value now " << test_ctx_off[
i]
324 <<
" not " << correction;
325 BOOST_REQUIRE_MESSAGE(correction==test_ctx_off[
i],os.str());
327 int absolute_context =
337 int num_bases2compare =
341 os <<
"Sequence data in chunk " << chunk_num
342 <<
", context " <<
i;
344 x_CmpSequenceData(&global_seq->
sequence[global_offset],
347 BOOST_REQUIRE_MESSAGE(rv,os.str());
362 for (
size_t i = 0;
i <
len;
i++) {
363 if (global[
i] != chunk[
i]) {
387 x_ReadQueryBoundsPerChunk(
kTestName, sqb, split_query_vector);
388 x_ValidateQuerySeqLocsPerChunk(splitter, split_query_vector);
400 vector< vector<size_t> > queries_per_chunk;
401 x_ReadVectorOfVectorsForTest(
kTestName,
"Queries", queries_per_chunk);
402 x_ValidateQueriesPerChunkAssignment(*sqb, queries_per_chunk);
404 vector< vector<int> > ctxs_per_chunk;
405 x_ReadVectorOfVectorsForTest(
kTestName,
"Contexts", ctxs_per_chunk);
406 x_ValidateQueryContextsPerChunkAssignment(*sqb, ctxs_per_chunk);
408 vector< vector<size_t> > ctx_offsets_per_chunk;
409 x_ReadVectorOfVectorsForTest(
kTestName,
"ContextOffsets",
410 ctx_offsets_per_chunk);
411 x_ValidateContextOffsetsPerChunkAssignment(*sqb, ctx_offsets_per_chunk);
413 vector<BlastQueryInfo*> split_query_info;
416 x_ValidateLocalQueryData(splitter, &*opts, split_query_info);
425 query_strands =
NULL)
428 gi_length.push_back(make_pair<int, size_t>(112258880, 362959));
429 gi_length.push_back(make_pair<int, size_t>(112253843, 221853));
430 gi_length.push_back(make_pair<int, size_t>(112193060, 194837));
431 gi_length.push_back(make_pair<int, size_t>(112193059, 204796));
433 BOOST_REQUIRE_EQUAL(gi_length.size(), query_strands->size());
443 if ( !query_strands ) {
452 x_ReadQueryBoundsPerChunk(
kTestName, sqb, split_query_vector);
453 x_ValidateQuerySeqLocsPerChunk(splitter, split_query_vector);
465 vector< vector<size_t> > queries_per_chunk;
466 x_ReadVectorOfVectorsForTest(
kTestName,
"Queries", queries_per_chunk);
467 x_ValidateQueriesPerChunkAssignment(*sqb, queries_per_chunk);
469 vector< vector<int> > ctxs_per_chunk;
470 x_ReadVectorOfVectorsForTest(
kTestName,
"Contexts", ctxs_per_chunk);
471 x_ValidateQueryContextsPerChunkAssignment(*sqb, ctxs_per_chunk);
473 vector< vector<size_t> > ctx_offsets_per_chunk;
474 x_ReadVectorOfVectorsForTest(
kTestName,
"ContextOffsets",
475 ctx_offsets_per_chunk);
476 x_ValidateContextOffsetsPerChunkAssignment(*sqb, ctx_offsets_per_chunk);
478 vector<BlastQueryInfo*> split_query_info;
481 x_ValidateLocalQueryData(splitter, &*opts, split_query_info);
490 const size_t kLength = 122347;
495 CreateSSeqLoc(*
id,
range, strand));
519 vector< vector<size_t> > queries_per_chunk;
520 x_ReadVectorOfVectorsForTest(
kTestName,
"Queries", queries_per_chunk);
521 x_ValidateQueriesPerChunkAssignment(*sqb, queries_per_chunk);
523 vector< vector<int> > ctxs_per_chunk;
524 x_ReadVectorOfVectorsForTest(
kTestName,
"Contexts", ctxs_per_chunk);
525 x_ValidateQueryContextsPerChunkAssignment(*sqb, ctxs_per_chunk);
527 vector< vector<size_t> > ctx_offsets_per_chunk;
528 x_ReadVectorOfVectorsForTest(
kTestName,
"ContextOffsets",
529 ctx_offsets_per_chunk);
530 x_ValidateContextOffsetsPerChunkAssignment(*sqb, ctx_offsets_per_chunk);
532 vector<BlastQueryInfo*> split_query_info;
535 x_ValidateLocalQueryData(splitter, &*opts, split_query_info);
544 query_strands =
NULL)
547 gi_length.push_back(make_pair<int, size_t>(112817621, 5567));
548 gi_length.push_back(make_pair<int, size_t>(112585373, 5987));
549 gi_length.push_back(make_pair<int, size_t>(112585216, 5531));
550 gi_length.push_back(make_pair<int, size_t>(112585119, 5046));
552 BOOST_REQUIRE_EQUAL(gi_length.size(), query_strands->size());
562 if ( !query_strands ) {
585 vector< vector<size_t> > queries_per_chunk;
586 x_ReadVectorOfVectorsForTest(
kTestName,
"Queries", queries_per_chunk);
587 x_ValidateQueriesPerChunkAssignment(*sqb, queries_per_chunk);
589 vector< vector<int> > ctxs_per_chunk;
590 x_ReadVectorOfVectorsForTest(
kTestName,
"Contexts", ctxs_per_chunk);
591 x_ValidateQueryContextsPerChunkAssignment(*sqb, ctxs_per_chunk);
593 vector< vector<size_t> > ctx_offsets_per_chunk;
594 x_ReadVectorOfVectorsForTest(
kTestName,
"ContextOffsets",
595 ctx_offsets_per_chunk);
596 x_ValidateContextOffsetsPerChunkAssignment(*sqb, ctx_offsets_per_chunk);
598 vector<BlastQueryInfo*> split_query_info;
601 x_ValidateLocalQueryData(splitter, &*opts, split_query_info);
620 size_t concatenated_query_length,
631 BOOST_REQUIRE(chunk_range.
NotEmpty());
633 if (chunk_range.
GetToOpen() > concatenated_query_length) {
634 chunk_range.
SetToOpen(concatenated_query_length);
654 split_query_vector.clear();
660 if (kNumChunks == kDefaultIntValue) {
661 throw runtime_error(
"Invalid number of chunks in " +
kTestName);
666 for (
int i = 0;
i < kNumChunks;
i++) {
669 const vector<size_t> kQueryIndices = sqb->GetQueryIndices(
i);
671 BOOST_REQUIRE( !kQueryIndices.empty() );
674 ITERATE(vector<size_t>, itr, kQueryIndices) {
676 out <<
"Query" << *itr;
679 os.str() +
out.str());
684 vector<size_t> query_data;
685 x_ParseConfigLine(
value, query_data);
686 BOOST_REQUIRE_MESSAGE((
size_t)3==query_data.size(),os.str() +
out.str());
689 sl->
SetInt().SetFrom(query_data[0]);
690 sl->
SetInt().SetTo(query_data[1]);
695 split_query_vector[
i]->AddQuery(bsq);
711 if (split_query_vector.empty()) {
716 os <<
"Different split query vector sizes";
718 BOOST_REQUIRE_MESSAGE(split_query_vector.size()==(
size_t)splitter->
m_NumChunks,os.str());
726 os <<
"Different split query vector sizes for chunk " <<
i;
727 BOOST_REQUIRE_MESSAGE(ref_qvector->
Size()==test_qvector->
Size(),os.str());
729 for (
size_t j = 0; j < ref_qvector->
Size(); j++) {
736 os <<
"Starting offset for query " << j <<
" in chunk " <<
i <<
" is now " << test_query_range.
GetFrom() <<
" and not " << ref_query_range.
GetFrom();
737 BOOST_REQUIRE_MESSAGE(ref_query_range.
GetFrom()==test_query_range.
GetFrom(),os.str());
739 os <<
"Ending offset for query " << j <<
" in chunk " <<
i <<
" is now " << test_query_range.
GetToOpen() <<
" and not " << ref_query_range.
GetTo();
740 BOOST_REQUIRE_MESSAGE(ref_query_range.
GetTo()==test_query_range.
GetToOpen(),os.str());
742 os <<
"Strand for query " << j <<
" in chunk " <<
i <<
" is now "
762 vector<BlastQueryInfo*>& retval)
764 ostringstream os, errors;
768 if (kNumChunks == kDefaultIntValue) {
769 throw runtime_error(
"Invalid number of chunks in " +
kTestName);
773 retval.reserve(kNumChunks);
776 for (
int i = 0;
i < kNumChunks;
i++) {
778 os <<
"BlastQueryInfo" <<
i <<
".";
779 const string kPrefix(os.str());
780 errors.str(
"Chunk ");
785 if (kNumQueries == kDefaultIntValue) {
786 string msg(
"Invalid BlastQueryInfo::num_queries in ");
787 msg +=
kTestName +
" or value not specified";
793 errors <<
"Failed to allocate BlastQueryInfo structure"
794 <<
" (Number of queries=" << kNumQueries <<
")";
795 BOOST_REQUIRE_MESSAGE(retval[
i],errors.str());
801 errors.str(
"Chunk ");
803 BOOST_REQUIRE_MESSAGE(retval[
i]->first_context >= 0,errors.str());
809 BOOST_REQUIRE_MESSAGE(retval[
i]->last_context >= 0,errors.str());
810 BOOST_REQUIRE_MESSAGE(retval[
i]->first_context <= retval[
i]->last_context,errors.str());
812 for (
int c = retval[
i]->first_context;
813 c <= retval[
i]->last_context;
817 errors <<
"Chunk " <<
i <<
", BlastQueryInfo::context " << c;
822 retval[
i]->contexts[c].query_offset =
824 "query_offset", kDefaultIntValue);
825 BOOST_REQUIRE_MESSAGE(retval[
i]->contexts[c].query_offset >= 0,
826 errors.str() +
" query_offset >= 0");
828 retval[
i]->contexts[c].query_length =
830 "query_length", kDefaultIntValue);
831 BOOST_REQUIRE_MESSAGE(retval[
i]->contexts[c].query_length >= 0,
832 errors.str() +
" query_length >= 0");
834 retval[
i]->contexts[c].eff_searchsp =
836 "eff_searchsp", kDefaultIntValue);
837 BOOST_REQUIRE_MESSAGE(retval[
i]->contexts[c].eff_searchsp >= 0,
838 errors.str() +
" eff_searchsp >= 0");
840 retval[
i]->contexts[c].length_adjustment =
842 "length_adjustment", kDefaultIntValue);
843 BOOST_REQUIRE_MESSAGE(retval[
i]->contexts[c].length_adjustment >= 0,
844 errors.str() +
" length_adjustment >= 0");
846 retval[
i]->contexts[c].query_index =
848 "query_index", kDefaultIntValue);
849 BOOST_REQUIRE_MESSAGE(retval[
i]->contexts[c].query_index >= 0,
850 errors.str() +
" query_index");
852 retval[
i]->contexts[c].frame =
854 "frame", kDefaultIntValue);
855 BOOST_REQUIRE_MESSAGE(retval[
i]->contexts[c].frame == 1
856 || retval[
i]->contexts[c].frame == 2
857 || retval[
i]->contexts[c].frame == 3
858 || retval[
i]->contexts[c].frame == -1
859 || retval[
i]->contexts[c].frame == -2
860 || retval[
i]->contexts[c].frame == -3
861 || retval[
i]->contexts[c].frame == 0,
862 errors.str() +
" frame");
864 retval[
i]->contexts[c].is_valid =
867 BOOST_REQUIRE_MESSAGE(retval[
i]->contexts[c].
is_valid,
868 errors.str() +
" is_valid");
884 const char* data_to_read,
885 vector< vector<T> >& retval)
891 if (kNumChunks == kDefaultIntValue) {
892 throw runtime_error(
"Invalid number of chunks in " +
kTestName);
896 retval.resize(kNumChunks);
898 for (
int i = 0;
i < kNumChunks;
i++) {
900 os <<
"Chunk" <<
i << data_to_read;
903 x_ParseConfigLine(
value, retval[
i]);
915 vector<string> tokens;
917 retval.reserve(tokens.size());
918 ITERATE(vector<string>, token, tokens) {
940 for (
size_t i = 0;
i < kNumChunks;
i++) {
941 x_ComputeQueryChunkBounds(expected_chunk_range,
kChunkSize,
942 kQuerySize, kQueryChunkOverlapSize);
944 BOOST_REQUIRE_EQUAL(expected_chunk_range.
GetFrom(),
946 BOOST_REQUIRE_EQUAL(expected_chunk_range.
GetToOpen(),
952 BOOST_REQUIRE_EQUAL(expected_chunk_range.
GetFrom(), chunk_start);
953 BOOST_REQUIRE_EQUAL(expected_chunk_range.
GetToOpen(), chunk_end);
954 TSeqPos chunk_length = chunk_end - chunk_start;
955 BOOST_REQUIRE_EQUAL(chunk_length,
966 const vector< vector<size_t> >&
970 BOOST_REQUIRE_EQUAL(kNumChunks, queries_per_chunk.size());
972 for (
size_t i = 0;
i < kNumChunks;
i++) {
974 os <<
"Chunk number " <<
i <<
" has an invalid number of queries";
977 BOOST_REQUIRE_MESSAGE(queries_per_chunk[
i].
size()==data2test.size(),os.str());
979 for (
size_t j = 0; j < data2test.size(); j++) {
981 os <<
"Query index mismatch in chunk number " <<
i
982 <<
" entry number " << j;
983 BOOST_REQUIRE_MESSAGE(queries_per_chunk[
i][j]==data2test[j],os.str());
992 const vector< vector<int> >&
997 BOOST_REQUIRE_EQUAL(kNumChunks, contexts_per_chunk.size());
998 for (
size_t i = 0;
i < kNumChunks;
i++) {
1000 os <<
"Chunk number " <<
i <<
" has an invalid number of contexts";
1003 BOOST_REQUIRE_MESSAGE(contexts_per_chunk[
i].
size()==data2test.size(),os.str());
1005 for (
size_t j = 0; j < data2test.size(); j++) {
1007 os <<
"Context index mismatch in chunk number " <<
i
1008 <<
" entry number " << j;
1009 BOOST_REQUIRE_MESSAGE(contexts_per_chunk[
i][j]==data2test[j],os.str());
1018 const vector< vector<size_t> >&
1019 contexts_offsets_per_chunk)
1022 BOOST_REQUIRE_EQUAL(kNumChunks, contexts_offsets_per_chunk.size());
1023 for (
size_t i = 0;
i < kNumChunks;
i++) {
1025 os <<
"Chunk number " <<
i
1026 <<
" has an invalid number of context offsets";
1029 BOOST_REQUIRE_MESSAGE(contexts_offsets_per_chunk[
i].
size()==data2test.size(),os.str());
1031 for (
size_t j = 0; j < data2test.size(); j++) {
1033 os <<
"Context offset mismatch in chunk number " <<
i
1034 <<
" entry number " << j <<
" value now " << data2test[j]
1035 <<
" not " << contexts_offsets_per_chunk[
i][j];
1037 BOOST_REQUIRE_MESSAGE(contexts_offsets_per_chunk[
i][j]==data2test[j],os.str());
1055 os <<
"Chunk " << chunk_num <<
": BlastQueryInfo::first_context";
1059 os <<
"Chunk " << chunk_num <<
": BlastQueryInfo::last_context";
1060 BOOST_REQUIRE_MESSAGE(reference->
last_context==
test->last_context,os.str());
1063 os <<
"Chunk " << chunk_num <<
": BlastQueryInfo::num_queries";
1064 BOOST_REQUIRE_MESSAGE(reference->
num_queries==
test->num_queries,os.str());
1067 os <<
"Chunk " << chunk_num <<
": BlastQueryInfo::max_length";
1068 BOOST_REQUIRE_MESSAGE(reference->
max_length==
test->max_length,os.str());
1071 os <<
"Chunk " << chunk_num <<
": BlastQueryInfo::pattern_info";
1072 BOOST_REQUIRE_MESSAGE(reference->
pattern_info==
test->pattern_info,os.str());
1075 ctx <= reference->last_context;
1079 os <<
"Chunk " << chunk_num <<
", context " <<
ctx;
1081 os.str() +
" query_offset");
1083 os.str() +
" query_length");
1085 os.str() +
" eff_searchsp");
1087 os.str() +
" query_index");
1089 os.str() +
" frame");
1091 os.str() +
" is_valid");
1104 vector<BlastQueryInfo*>
1105 split_query_info_structs)
1108 BOOST_REQUIRE(options);
1112 BOOST_REQUIRE_EQUAL(kNumChunks, split_query_info_structs.size());
1114 for (
size_t i = 0;
i < kNumChunks;
i++) {
1116 os <<
"Chunk " <<
i <<
": ";
1118 BOOST_REQUIRE_MESSAGE(qf.
NotEmpty(),os.str() +
"NULL query factory");
1120 BOOST_REQUIRE_MESSAGE(qd.
NotEmpty(),os.str() +
"NULL local query data");
1122 os <<
"Different number of queries";
1155 vector<Int4> query_indices_expected;
1156 query_indices_expected.push_back(45);
1157 query_indices_expected.push_back(0);
1158 query_indices_expected.push_back(7);
1160 ITERATE(vector<Int4>, qi, query_indices_expected) {
1162 BOOST_REQUIRE_EQUAL((
Int2)0, rv);
1164 BOOST_REQUIRE_EQUAL((
Int2)0, rv);
1170 BOOST_REQUIRE_EQUAL((
Int2)0, rv);
1172 BOOST_REQUIRE_EQUAL(query_indices_expected[
i],
1173 (
Int4)query_indices[
i]);
1175 sfree(query_indices);
1178 Uint4 num_query_contexts = 0;
1181 &num_query_contexts);
1182 BOOST_REQUIRE_EQUAL((
Int2)0, rv);
1183 for (
Uint4 i = 0;
i < num_query_contexts;
i++) {
1184 BOOST_REQUIRE_EQUAL(query_indices_expected[
i], query_contexts[
i]);
1186 sfree(query_contexts);
1188 size_t num_queries(0);
1191 BOOST_REQUIRE_EQUAL((
Int2)0, rv);
1192 BOOST_REQUIRE_EQUAL(query_indices_expected.size(), num_queries);
1199 map.resize(kNumChunks);
1200 Uint4 query_index = 0;
1203 for (
Uint4 chunk_num = 0; chunk_num < kNumChunks; chunk_num++) {
1205 for (
Uint4 i = 0;
i < kQueriesPerChunk;
i++) {
1206 map[chunk_num].push_back(query_index++);
1212 for (
size_t chunk_num = 0; chunk_num < map.size(); chunk_num++) {
1213 ITERATE(vector<Uint4>, qi, map[chunk_num]) {
1216 BOOST_REQUIRE_EQUAL((
Int2)0, rv);
1220 for (
Uint4 chunk_num = 0; chunk_num < kNumChunks; chunk_num++) {
1221 vector<Uint4> query_indices_expected = map[chunk_num];
1227 BOOST_REQUIRE_EQUAL((
Int2)0, rv);
1228 BOOST_REQUIRE(query_indices !=
NULL);
1231 for (
i = 0;
i < query_indices_expected.size();
i++) {
1232 BOOST_REQUIRE_EQUAL(query_indices_expected[
i],
1236 sfree(query_indices);
1238 size_t num_queries(0);
1241 BOOST_REQUIRE_EQUAL((
Int2)0, rv);
1242 BOOST_REQUIRE_EQUAL(query_indices_expected.size(), num_queries);
1247 const Uint4 kNumChunks = 3;
1249 map.resize(kNumChunks);
1250 map[0].push_back(0);
1251 map[0].push_back(1);
1252 map[1].push_back(2);
1253 map[2].push_back(3);
1257 for (
Uint4 chunk_num = 0; chunk_num < map.size(); chunk_num++) {
1258 ITERATE(vector<Uint4>, qi, map[chunk_num]) {
1261 BOOST_REQUIRE_EQUAL((
Int2)0, rv);
1265 for (
Uint4 chunk_num = 0; chunk_num < kNumChunks; chunk_num++) {
1266 vector<Uint4> query_indices_expected = map[chunk_num];
1272 BOOST_REQUIRE_EQUAL((
Int2)0, rv);
1273 BOOST_REQUIRE(query_indices !=
NULL);
1276 for (
i = 0;
i < query_indices_expected.size();
i++) {
1277 BOOST_REQUIRE_EQUAL(query_indices_expected[
i],
1281 sfree(query_indices);
1283 size_t num_queries(0);
1286 BOOST_REQUIRE_EQUAL((
Int2)0, rv);
1287 BOOST_REQUIRE_EQUAL(query_indices_expected.size(), num_queries);
1296 kTestName(
"QuerySplitter_BlastnSingleQueryMultiChunk_BothStrands");
1306 kTestName(
"QuerySplitter_BlastnSingleQueryMultiChunk_PlusStrand");
1316 kTestName(
"QuerySplitter_BlastnSingleQueryMultiChunk_MinusStrand");
1326 kTestName(
"QuerySplitter_BlastnMultiQueryMultiChunk_PlusStrand");
1336 kTestName(
"QuerySplitter_BlastnMultiQueryMultiChunk_MinusStrand");
1346 kTestName(
"QuerySplitter_BlastnMultiQueryMultiChunk_BothStrands");
1355 kTestName(
"QuerySplitter_BlastnMultiQueryMultiChunk_MixedStrands");
1356 vector<ENa_strand> query_strands;
1357 query_strands.reserve(4);
1363 QuerySplitter_BlastnMultiQueryMultiChunk(
kTestName,
1373 kTestName(
"QuerySplitter_BlastxSingleQueryMultiChunk_BothStrands");
1381 kTestName(
"QuerySplitter_BlastxSingleQueryMultiChunk_PlusStrand");
1389 kTestName(
"QuerySplitter_BlastxSingleQueryMultiChunk_MinusStrand");
1398 kTestName(
"QuerySplitter_BlastxMultiQueryMultiChunk_PlusStrand");
1407 kTestName(
"QuerySplitter_BlastxMultiQueryMultiChunk_MinusStrand");
1416 kTestName(
"QuerySplitter_BlastxMultiQueryMultiChunk_BothStrands");
1423 kTestName(
"QuerySplitter_BlastxMultiQueryMultiChunk_MixedStrands");
1424 vector<ENa_strand> query_strands;
1425 query_strands.reserve(4);
1439 const string kTestName(
"QuerySplitter_BlastpSingleQueryMultiChunk");
1441 const size_t kLength = 33423;
1454 BOOST_REQUIRE_EQUAL(m_Config->GetInt(
kTestName,
"ChunkSize",
1459 x_ReadQueryBoundsPerChunk(
kTestName, sqb, split_query_vector);
1460 x_ValidateQuerySeqLocsPerChunk(splitter, split_query_vector);
1467 const size_t kNumChunks = (size_t)m_Config->GetInt(
kTestName,
1473 vector< vector<size_t> > queries_per_chunk;
1474 x_ReadVectorOfVectorsForTest(
kTestName,
"Queries", queries_per_chunk);
1475 x_ValidateQueriesPerChunkAssignment(*sqb, queries_per_chunk);
1477 vector< vector<int> > ctxs_per_chunk;
1478 x_ReadVectorOfVectorsForTest(
kTestName,
"Contexts", ctxs_per_chunk);
1479 x_ValidateQueryContextsPerChunkAssignment(*sqb, ctxs_per_chunk);
1481 vector< vector<size_t> > ctx_offsets_per_chunk;
1482 x_ReadVectorOfVectorsForTest(
kTestName,
"ContextOffsets",
1483 ctx_offsets_per_chunk);
1484 x_ValidateContextOffsetsPerChunkAssignment(*sqb, ctx_offsets_per_chunk);
1486 vector<BlastQueryInfo*> split_query_info;
1489 x_ValidateLocalQueryData(splitter, &*opts, split_query_info);
1497 const string kTestName(
"QuerySplitter_BlastpMultiQueryMultiChunk");
1500 gi_length.push_back(make_pair<int, size_t>(33624848, 6883));
1501 gi_length.push_back(make_pair<int, size_t>(4758794, 6669));
1502 gi_length.push_back(make_pair<int, size_t>(66821305, 6061));
1503 gi_length.push_back(make_pair<int, size_t>(109075552, 5007));
1517 BOOST_REQUIRE_EQUAL(m_Config->GetInt(
kTestName,
"ChunkSize",
1522 x_ReadQueryBoundsPerChunk(
kTestName, sqb, split_query_vector);
1523 x_ValidateQuerySeqLocsPerChunk(splitter, split_query_vector);
1530 const size_t kNumChunks = (size_t)m_Config->GetInt(
kTestName,
1536 vector< vector<size_t> > queries_per_chunk;
1537 x_ReadVectorOfVectorsForTest(
kTestName,
"Queries", queries_per_chunk);
1538 x_ValidateQueriesPerChunkAssignment(*sqb, queries_per_chunk);
1540 vector< vector<int> > ctxs_per_chunk;
1541 x_ReadVectorOfVectorsForTest(
kTestName,
"Contexts", ctxs_per_chunk);
1542 x_ValidateQueryContextsPerChunkAssignment(*sqb, ctxs_per_chunk);
1544 vector< vector<size_t> > ctx_offsets_per_chunk;
1545 x_ReadVectorOfVectorsForTest(
kTestName,
"ContextOffsets",
1546 ctx_offsets_per_chunk);
1547 x_ValidateContextOffsetsPerChunkAssignment(*sqb, ctx_offsets_per_chunk);
1549 vector<BlastQueryInfo*> split_query_info;
1552 x_ValidateLocalQueryData(splitter, &*opts, split_query_info);
1562 kTestName(
"TestCContextTranslator_BlastnMultiQuery_BothStrands");
1564 gi_length.push_back(make_pair<int, size_t>(107784911, 1000));
1565 gi_length.push_back(make_pair<int, size_t>(115354032, 250));
1566 gi_length.push_back(make_pair<int, size_t>(115381005, 2551));
1569 const size_t num_chunks = 9;
1571 vector< vector<int> > starting_chunks(num_chunks);
1572 vector< vector<int> > absolute_contexts(num_chunks);
1573 vector< vector<size_t> > context_offset_corrections(num_chunks);
1575 x_ReadVectorOfVectorsForTest(
kTestName,
"StartingChunks",
1577 x_ReadVectorOfVectorsForTest(
kTestName,
"AbsoluteContexts",
1579 x_ReadVectorOfVectorsForTest(
kTestName,
"ContextOffsets",
1580 context_offset_corrections);
1583 starting_chunks, absolute_contexts,
1584 &context_offset_corrections,
1592 kTestName(
"TestCContextTranslator_BlastnMultiQuery_PlusStrand");
1594 gi_length.push_back(make_pair<int, size_t>(107784911, 1000));
1595 gi_length.push_back(make_pair<int, size_t>(115354032, 250));
1596 gi_length.push_back(make_pair<int, size_t>(115381005, 2551));
1599 const size_t num_chunks = 9;
1601 vector< vector<int> > starting_chunks(num_chunks);
1602 vector< vector<int> > absolute_contexts(num_chunks);
1603 vector< vector<size_t> > context_offset_corrections(num_chunks);
1605 x_ReadVectorOfVectorsForTest(
kTestName,
"StartingChunks",
1607 x_ReadVectorOfVectorsForTest(
kTestName,
"AbsoluteContexts",
1609 x_ReadVectorOfVectorsForTest(
kTestName,
"ContextOffsets",
1610 context_offset_corrections);
1613 starting_chunks, absolute_contexts,
1614 &context_offset_corrections,
1622 kTestName(
"TestCContextTranslator_BlastnMultiQuery_MinusStrand");
1624 gi_length.push_back(make_pair<int, size_t>(107784911, 1000));
1625 gi_length.push_back(make_pair<int, size_t>(115354032, 250));
1626 gi_length.push_back(make_pair<int, size_t>(115381005, 2551));
1629 const size_t num_chunks = 9;
1631 vector< vector<int> > starting_chunks(num_chunks);
1632 vector< vector<int> > absolute_contexts(num_chunks);
1633 vector< vector<size_t> > context_offset_corrections(num_chunks);
1635 x_ReadVectorOfVectorsForTest(
kTestName,
"StartingChunks",
1637 x_ReadVectorOfVectorsForTest(
kTestName,
"AbsoluteContexts",
1639 x_ReadVectorOfVectorsForTest(
kTestName,
"ContextOffsets",
1640 context_offset_corrections);
1643 starting_chunks, absolute_contexts,
1644 &context_offset_corrections,
1652 kTestName(
"TestCContextTranslator_BlastxSingleQuery_BothStrands_0");
1654 gi_length.push_back(make_pair<int, size_t>(116001669, 33));
1657 const size_t num_chunks = 3;
1660 vector< vector<int> > starting_chunks(num_chunks);
1661 vector< vector<int> > absolute_contexts(num_chunks);
1662 vector< vector<size_t> > context_offset_corrections(num_chunks);
1664 x_ReadVectorOfVectorsForTest(
kTestName,
"StartingChunks",
1666 x_ReadVectorOfVectorsForTest(
kTestName,
"AbsoluteContexts",
1668 x_ReadVectorOfVectorsForTest(
kTestName,
"ContextOffsets",
1669 context_offset_corrections);
1672 starting_chunks, absolute_contexts,
1673 &context_offset_corrections,
1682 kTestName(
"TestCContextTranslator_BlastxSingleQuery_BothStrands_1");
1684 gi_length.push_back(make_pair<int, size_t>(116001673, 34));
1687 const size_t num_chunks = 3;
1690 vector< vector<int> > starting_chunks(num_chunks);
1691 vector< vector<int> > absolute_contexts(num_chunks);
1692 vector< vector<size_t> > context_offset_corrections(num_chunks);
1694 x_ReadVectorOfVectorsForTest(
kTestName,
"StartingChunks",
1696 x_ReadVectorOfVectorsForTest(
kTestName,
"AbsoluteContexts",
1698 x_ReadVectorOfVectorsForTest(
kTestName,
"ContextOffsets",
1699 context_offset_corrections);
1702 starting_chunks, absolute_contexts,
1703 &context_offset_corrections,
1712 kTestName(
"TestCContextTranslator_BlastxSingleQuery_BothStrands_2");
1714 gi_length.push_back(make_pair<int, size_t>(116001668, 35));
1717 const size_t kNumChunks = m_Config->GetInt(
kTestName,
"NumChunks",
1721 vector< vector<int> > starting_chunks(kNumChunks);
1722 vector< vector<int> > absolute_contexts(kNumChunks);
1723 vector< vector<size_t> > context_offset_corrections(kNumChunks);
1725 x_ReadVectorOfVectorsForTest(
kTestName,
"StartingChunks",
1727 x_ReadVectorOfVectorsForTest(
kTestName,
"AbsoluteContexts",
1729 x_ReadVectorOfVectorsForTest(
kTestName,
"ContextOffsets",
1730 context_offset_corrections);
1733 starting_chunks, absolute_contexts,
1734 &context_offset_corrections,
1743 kTestName(
"TestCContextTranslator_BlastxMultiQuery_BothStrands");
1745 gi_length.push_back(make_pair<int, size_t>(107784911, 1000));
1746 gi_length.push_back(make_pair<int, size_t>(115354032, 250));
1747 gi_length.push_back(make_pair<int, size_t>(115381005, 2551));
1750 const size_t num_chunks = 10;
1752 vector< vector<int> > starting_chunks(num_chunks);
1753 vector< vector<int> > absolute_contexts(num_chunks);
1754 vector< vector<size_t> > context_offset_corrections(num_chunks);
1756 x_ReadVectorOfVectorsForTest(
kTestName,
"StartingChunks",
1758 x_ReadVectorOfVectorsForTest(
kTestName,
"AbsoluteContexts",
1760 x_ReadVectorOfVectorsForTest(
kTestName,
"ContextOffsets",
1761 context_offset_corrections);
1764 starting_chunks, absolute_contexts,
1765 &context_offset_corrections,
1771 kTestName(
"TestCContextTranslator_BlastxMultiQuery_PlusStrand");
1773 gi_length.push_back(make_pair<int, size_t>(107784911, 1000));
1774 gi_length.push_back(make_pair<int, size_t>(115354032, 250));
1775 gi_length.push_back(make_pair<int, size_t>(115381005, 2551));
1778 const size_t num_chunks = 10;
1780 vector< vector<int> > starting_chunks(num_chunks);
1781 vector< vector<int> > absolute_contexts(num_chunks);
1782 vector< vector<size_t> > context_offset_corrections(num_chunks);
1784 x_ReadVectorOfVectorsForTest(
kTestName,
"StartingChunks",
1786 x_ReadVectorOfVectorsForTest(
kTestName,
"AbsoluteContexts",
1788 x_ReadVectorOfVectorsForTest(
kTestName,
"ContextOffsets",
1789 context_offset_corrections);
1792 starting_chunks, absolute_contexts,
1793 &context_offset_corrections,
1799 kTestName(
"TestCContextTranslator_BlastxMultiQuery_MinusStrand");
1801 gi_length.push_back(make_pair<int, size_t>(107784911, 1000));
1802 gi_length.push_back(make_pair<int, size_t>(115354032, 250));
1803 gi_length.push_back(make_pair<int, size_t>(115381005, 2551));
1806 const size_t num_chunks = 10;
1808 vector< vector<int> > starting_chunks(num_chunks);
1809 vector< vector<int> > absolute_contexts(num_chunks);
1810 vector< vector<size_t> > context_offset_corrections(num_chunks);
1812 x_ReadVectorOfVectorsForTest(
kTestName,
"StartingChunks",
1814 x_ReadVectorOfVectorsForTest(
kTestName,
"AbsoluteContexts",
1816 x_ReadVectorOfVectorsForTest(
kTestName,
"ContextOffsets",
1817 context_offset_corrections);
1820 starting_chunks, absolute_contexts,
1821 &context_offset_corrections,
1830 const string kTestName(
"QuerySplitter_NoSplit");
1839 const size_t kNumChunks = m_Config->GetInt(
kTestName,
"NumChunks",
1844 BOOST_REQUIRE_EQUAL(m_Config->GetInt(
kTestName,
"ChunkSize",
1856 BOOST_REQUIRE(
false);
1857 }
catch (
const runtime_error&) {
1858 BOOST_REQUIRE(
true);
1863 BOOST_REQUIRE_EQUAL(qf, chunk_query_factory);
1871 gi_length.push_back(make_pair<int, size_t>(95116755, 35000));
1872 gi_length.push_back(make_pair<int, size_t>(112123020, 35580));
1882 const size_t kNumChunks(2);
1886 BOOST_REQUIRE(
false);
1887 }
catch (
const out_of_range& ) {
1888 BOOST_REQUIRE(
true);
1894 BOOST_REQUIRE(chunk_0 != qf);
1895 BOOST_REQUIRE(chunk_1 != qf);
1907 BOOST_REQUIRE_EQUAL(1055, retval);
1912 BOOST_REQUIRE_EQUAL(1, retval);
1918 BOOST_REQUIRE_EQUAL(3, retval);
1924 BOOST_REQUIRE_EQUAL(2, retval);
Auxiliary functions for BLAST.
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
Definitions which are dependant on the NCBI C++ Object Manager.
Declares the CBlastOptionsHandle and CBlastOptionsFactory classes.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
BlastQueryInfo * BlastQueryInfoFree(BlastQueryInfo *query_info)
Deallocate memory for query information structure.
BlastQueryInfo * BlastQueryInfoNew(EBlastProgramType program, int num_queries)
Allocate memory for query information structure.
EProgram
This enumeration is to evolve into a task/program specific list that specifies sets of default parame...
@ eTblastx
Translated nucl-Translated nucl.
@ eBlastn
Nucl-Nucl (traditional blastn)
@ eBlastp
Protein-Protein.
@ eBlastx
Translated nucl-Protein.
CAutoEnvironmentVariable –.
Defines BLAST error codes (user errors included)
Encapsulates ALL the BLAST algorithm's options.
size_type Size() const
Returns the number of queries found in this query vector.
CConstRef< objects::CSeq_loc > GetQuerySeqLoc(size_type i) const
Get the query Seq-loc for a query by index.
Auxiliary class to provide convenient and efficient access to conversions between contexts local to q...
NCBI C++ Object Manager dependant implementation of IQueryFactory.
Class responsible for splitting query sequences and providing data to the BLAST search class to searc...
static CRef< CScope > NewScope(bool with_defaults=true)
Return a new scope, possibly (by default) with default loaders, which will include the Genbank loader...
Wrapper class around SSplitQueryBlk structure.
CRef< CNcbiRegistry > m_Config
This represents the split_query.ini configuration file.
void QuerySplitter_BlastxSingleQueryMultiChunk(const string &kTestName, ENa_strand strand)
void x_ValidateQueryContextsPerChunkAssignment(const CSplitQueryBlk &sqb, const vector< vector< int > > &contexts_per_chunk)
Validates the query contexts assigned to all the chunks.
void x_ValidateLocalQueryData(CRef< CQuerySplitter > splitter, const CBlastOptions *options, vector< BlastQueryInfo * > split_query_info_structs)
Validate the local query data for all chunks, comparing data produced by the CQuerySplitter object an...
void x_ReadVectorOfVectorsForTest(const string &kTestName, const char *data_to_read, vector< vector< T > > &retval)
This method reads entries in the config file of the format ChunkNX, here N is the chunk number and X ...
~CSplitQueryTestFixture()
void x_ValidateChunkBounds(size_t kChunkSize, size_t kQuerySize, const CSplitQueryBlk &sqb, EBlastProgramType p)
Auxiliary method to validate the chunk bounds calculated by the CSplitQueryBlk object and the x_Compu...
void x_ReadQueryBoundsPerChunk(const string &kTestName, CConstRef< CSplitQueryBlk > sqb, CQuerySplitter::TSplitQueryVector &split_query_vector)
This function reads values in the split_query.ini file with the format ChunkNQueryM (where N is the c...
void x_ValidateQueryInfoForChunk(const BlastQueryInfo *reference, const BlastQueryInfo *test, size_t chunk_num)
Validate the query info structure generated (test) against the expected one (reference) (N....
void x_TestCContextTranslator(TGiLengthVector &gi_length, size_t chunk_size, size_t num_chunks, blast::EProgram program, vector< vector< int > > &starting_chunks, vector< vector< int > > &absolute_contexts, vector< vector< size_t > > *context_offsets, ENa_strand strand, vector< ENa_strand > *query_strands=NULL)
void x_ValidateQueriesPerChunkAssignment(const CSplitQueryBlk &sqb, const vector< vector< size_t > > &queries_per_chunk)
Validates the query sequences (by index) assigned to all the chunks This compares the data calculated...
void QuerySplitter_BlastnSingleQueryMultiChunk(const string &kTestName, ENa_strand strand)
void x_ParseConfigLine(const string &input, vector< T > &retval)
Tokenizes a string containing comma-separated values into a vector of values.
void x_PrepareBlastQueryStructures(TIntId gis[], EProgram program, BLAST_SequenceBlk **seq_blk, BlastQueryInfo **qinfo, ENa_strand *strand=NULL)
Populate a BLAST_SequenceBlk and BlastQueryInfo structures out of an array of GIs.
void QuerySplitter_BlastxMultiQueryMultiChunk(const string &kTestName, ENa_strand strand, vector< ENa_strand > *query_strands=NULL)
bool x_CmpSequenceData(const Uint1 *global, const Uint1 *chunk, size_t len)
Auxiliary function that compares bytes of sequence data to validate the context offset corrections.
void x_ComputeQueryChunkBounds(TChunkRange &chunk_range, size_t chunk_size, size_t concatenated_query_length, size_t overlap)
Incrementally compute the query chunk bounds.
void x_ReadSplitQueryInfoForTest(const string &kTestName, EBlastProgramType program, vector< BlastQueryInfo * > &retval)
Reads data to populate multiple BlastQueryInfo structures.
void x_ValidateQuerySeqLocsPerChunk(CRef< CQuerySplitter > splitter, const CQuerySplitter::TSplitQueryVector &split_query_vector)
Compare the query data (start, stop, strand) for each chunk computed by the splitter vs.
void x_ValidateContextOffsetsPerChunkAssignment(const CSplitQueryBlk &sqb, const vector< vector< size_t > > &contexts_offsets_per_chunk)
Validates the context offsets assigned to all the chunks.
void QuerySplitter_BlastnMultiQueryMultiChunk(const string &kTestName, ENa_strand strand, vector< ENa_strand > *query_strands=NULL)
static CTestObjMgr & Instance()
Collection of masked regions for a single query sequence.
typedef for the messages for an entire BLAST search, which could be comprised of multiple query seque...
static const int chunk_size
static bool is_valid(const char *num, int type, CONV_RESULT *cr)
std::ofstream out("events_result.xml")
main entry point for tests
#define test(a, b, c, d, e)
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
size_t SplitQuery_GetOverlapChunkSize(EBlastProgramType program)
Size of the region that overlaps in between each query chunk.
void SetStrandOption(objects::ENa_strand s)
size_t GetChunkSize() const
Returns the number of bases/residues that make up a query chunk.
TChunkRange GetChunkBounds(size_t chunk_num) const
Get the boundaries of a chunk in the concatenated query.
SSplitQueryBlk * GetCStruct() const
Returns the C structure managed by objects of this class.
vector< CRef< CBlastQueryVector > > TSplitQueryVector
Definition of a vector of CBlastQueryVectors, each element corresponds to a query chunk.
bool IsQuerySplit() const
Determines whether the query sequence(s) are split or not.
size_t GetNumQueriesForChunk(size_t chunk_num) const
Get the number of queries in a given chunk.
size_t GetNumChunks() const
Retrieve the number of chunks.
int GetAbsoluteContext(size_t chunk_num, Int4 context_in_chunk) const
Get the context number in the absolute (i.e.
void SetupQueries(TSeqLocVector &queries, BlastQueryInfo *qinfo, BLAST_SequenceBlk **seqblk, EBlastProgramType prog, objects::ENa_strand strand_opt, TSearchMessages &messages)
Populates BLAST_SequenceBlk with sequence data for use in CORE BLAST.
objects::ENa_strand GetStrandOption() const
virtual BLAST_SequenceBlk * GetSequenceBlk()=0
Accessor for the BLAST_SequenceBlk structure.
CRef< ILocalQueryData > MakeLocalQueryData(const CBlastOptions *opts)
Creates and caches an ILocalQueryData.
static CBlastOptionsHandle * Create(EProgram program, EAPILocality locality=CBlastOptions::eLocal)
Creates an options handle object configured with default options for the requested program,...
CBlastOptions & SetOptions()
Returns a reference to the internal options class which this object is a handle for.
Uint4 m_NumChunks
Number of chunks, if this is 1, no splitting occurs.
EBlastProgramType EProgramToEBlastProgramType(EProgram p)
Convert EProgram to EBlastProgramType.
size_t SplitQuery_GetChunkSize(EProgram program)
Returns the optimal chunk size for a given task.
EBlastProgramType GetProgramType() const
Returns the CORE BLAST notion of program type.
virtual BlastQueryInfo * GetQueryInfo()=0
Accessor for the BlastQueryInfo structure.
const CBlastOptions & GetOptions() const
Return the object which this object is a handle for.
CRef< IQueryFactory > GetQueryFactoryForChunk(Uint4 chunk_num)
Returns a IQueryFactory suitable to be executed by a BLAST search class.
virtual size_t GetNumQueries()=0
Get the number of queries.
void SetupQueryInfo(TSeqLocVector &queries, EBlastProgramType prog, objects::ENa_strand strand_opt, BlastQueryInfo **qinfo)
Allocates the query information structure and fills the context offsets, in case of multiple queries,...
vector< size_t > GetQueryIndices(size_t chunk_num) const
Get the indices of the queries contained in a given chunk.
CRef< CSplitQueryBlk > Split()
Split the query sequence(s)
int GetStartingChunk(size_t curr_chunk, Int4 context_in_chunk) const
Get the chunk number where context_in_chunk starts (i.e.
vector< int > GetQueryContexts(size_t chunk_num) const
Get the contexts of the queries contained in a given chunk.
Uint4 SplitQuery_CalculateNumChunks(EBlastProgramType program, size_t *chunk_size, size_t concatenated_query_length, size_t num_queries)
Calculate the number of chunks that a query will be split into based upon query length,...
size_t GetSumOfSequenceLengths()
Compute the sum of all the sequence's lengths.
Uint4 GetNumberOfChunks() const
Returns the number of chunks the query/queries will be split into.
CRef< CSplitQueryBlk > m_SplitBlk
Split query block structure.
vector< size_t > GetContextOffsets(size_t chunk_num) const
Get the context offsets (corrections) of the queries contained in a given chunk.
TSplitQueryVector m_SplitQueriesInChunk
Vector of split queries.
#define GI_FROM(T, value)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
void SetPacked_int(TPacked_int &v)
ENa_strand GetStrand(void) const
Get the location's strand.
void SetId(CSeq_id &id)
set the 'id' field in all parts of this location
TRange GetTotalRange(void) const
void SetStrand(ENa_strand strand)
Set the strand for all of the location's ranges.
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
Uint4 TValue
Type of the generated integer value and/or the seed value.
TValue GetRand(void)
Get the next random number in the interval [0..GetMax()] (inclusive)
position_type GetLength(void) const
TThisType & SetFrom(position_type from)
bool NotEmpty(void) const
TThisType & SetToOpen(position_type toOpen)
position_type GetToOpen(void) const
position_type GetFrom(void) const
static TThisType GetEmpty(void)
TThisType & SetLength(position_type length)
int TFlags
Binary OR of "EFlags".
virtual bool GetBool(const string §ion, const string &name, bool default_value, TFlags flags=0, EErrAction err_action=eThrow) const
Get boolean value of specified parameter name.
virtual const string & Get(const string §ion, const string &name, TFlags flags=0) const
Get the parameter value.
virtual int GetInt(const string §ion, const string &name, int default_value, TFlags flags=0, EErrAction err_action=eThrow) const
Get integer value of specified parameter name.
bool Empty(TFlags flags=fAllLayers) const
Verify if Registry is empty.
@ fTruncate
Leading, trailing blanks can be truncated.
@ fNoOverride
Cannot change existing value.
@ fTransient
Transient – not saved by default.
@ fNotJustCore
Include auxiliary subregistries.
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string.
@ fConvErr_NoThrow
Do not throw an exception on error.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
ENa_strand
strand of nucleic acid
bool IsPacked_int(void) const
Check if variant Packed_int is selected.
bool IsInt(void) const
Check if variant Int is selected.
@ eNa_strand_both
in forward orientation
@ e_Gi
GenInfo Integrated Database.
unsigned int
A callback function used to compare two keys in a database.
Main class to perform a BLAST search on the local machine.
range(_Ty, _Ty) -> range< _Ty >
const struct ncbi::grid::netcache::search::fields::SIZE size
Magic spell ;-) needed for some weird compilers... very empiric.
const GenericPointer< typename T::ValueType > T2 value
#define UINT4_MAX
largest number represented by unsigned int.
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
vector< TMaskedQueryRegions > TSeqLocInfoVector
Collection of masked regions for all queries in a BLAST search.
Int2 SplitQueryBlk_GetQueryIndicesForChunk(const SSplitQueryBlk *squery_blk, Uint4 chunk_num, Uint4 **query_indices)
Retrieve an array of query indices for the requested chunk.
Int2 SplitQueryBlk_GetQueryContextsForChunk(const SSplitQueryBlk *squery_blk, Uint4 chunk_num, Int4 **query_contexts, Uint4 *num_query_contexts)
Retrieve an array of query contexts for the requested chunk.
Int2 SplitQueryBlk_AddQueryToChunk(SSplitQueryBlk *squery_blk, Uint4 query_index, Uint4 chunk_num)
Add a query index to a given chunk.
Int2 SplitQueryBlk_GetNumQueriesForChunk(const SSplitQueryBlk *squery_blk, Uint4 chunk_num, size_t *num_queries)
Retrieve the number of queries that correspond to chunk number chunk_num.
Int2 SplitQueryBlk_AddContextToChunk(SSplitQueryBlk *squery_blk, Int4 ctx_index, Uint4 chunk_num)
Add a query context index to a given chunk.
const Int4 kInvalidContext
Value to represent an invalid context.
const Int2 kBadParameter
Invalid parameter used in a function call.
Declares CQuerySplitter, a class to split the query sequence(s)
Auxiliary functions and classes to assist in query splitting.
static void s_ConvertToBlastQueries(const TGiLengthVector &gi_length, TSeqLocVector &retval, size_t *tot_length=NULL, vector< ENa_strand > *strands=NULL, const TSeqLocInfoVector *masks=NULL)
Convert a vector of GIs with its lengths into a TSeqLocVector.
static void s_CalculateMaxLength(BlastQueryInfo *query_info)
Calculate and assign the maximum length field in the BlastQueryInfo structure.
vector< TGiLenPair > TGiLengthVector
Vector containing pairs of gis and their length.
pair< TIntId, size_t > TGiLenPair
Pair for gis and their length (in that order)
BOOST_AUTO_TEST_CASE(SplitQueriesIn1Chunk)
vector< vector< Uint4 > > TSplitQueryChunkMap
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Structure to hold a sequence.
Uint1 * sequence
Sequence used for search (could be translation).
Int4 query_length
Length of this query, strand or frame.
Boolean is_valid
Determine if this context is valid or not.
Int4 query_offset
Offset of this query, strand or frame in the concatenated super-query.
Int8 eff_searchsp
Effective search space for this context.
Int4 query_index
Index of query (same for all frames)
Int1 frame
Frame number (-1, -2, -3, 0, 1, 2, or 3)
The query related information.
Int4 first_context
Index of the first element of the context array.
BlastContextInfo * contexts
Information per context.
int num_queries
Number of query sequences.
struct SPHIQueryInfo * pattern_info
Counts of PHI BLAST pattern occurrences, used in PHI BLAST only.
Int4 last_context
Index of the last element of the context array.
Uint4 max_length
Length of the longest among the concatenated queries.
Structure to represent a single sequence to be fed to BLAST.
Utility stuff for more convenient using of Boost.Test library.