00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #include <ncbi_pch.hpp>
00034 #include <corelib/test_boost.hpp>
00035
00036 #include <corelib/ncbi_limits.hpp>
00037
00038
00039 #include <serial/serial.hpp>
00040 #include <serial/objistr.hpp>
00041
00042 #include <util/random_gen.hpp>
00043 #include <util/math/matrix.hpp>
00044
00045
00046 #include <objects/general/Object_id.hpp>
00047 #include <objects/seqloc/Seq_id.hpp>
00048 #include <objects/seqalign/Score.hpp>
00049 #include <objects/seqalign/Dense_seg.hpp>
00050 #include <objects/seqalign/Seq_align.hpp>
00051 #include <objects/seqalign/Seq_align_set.hpp>
00052
00053
00054 #include <objects/scoremat/Pssm.hpp>
00055 #include <objects/scoremat/PssmParameters.hpp>
00056 #include <objects/scoremat/PssmWithParameters.hpp>
00057 #include <objects/scoremat/PssmFinalData.hpp>
00058 #include <objects/scoremat/PssmIntermediateData.hpp>
00059 #include <objects/scoremat/FormatRpsDbParameters.hpp>
00060
00061
00062 #include <algo/blast/api/blast_aux.hpp>
00063 #include <algo/blast/api/bl2seq.hpp>
00064 #include <algo/blast/api/pssm_engine.hpp>
00065 #include <algo/blast/api/pssm_input.hpp>
00066 #include <algo/blast/api/psi_pssm_input.hpp>
00067 #include <algo/blast/core/blast_setup.h>
00068 #include <blast_objmgr_priv.hpp>
00069 #include <blast_psi_priv.h>
00070 #include <blast_posit.h>
00071 #include "psiblast_aux_priv.hpp"
00072
00073 #include <algo/blast/api/blast_exception.hpp>
00074 #include <algo/blast/api/pssm_engine.hpp>
00075
00076
00077 #include "blast_test_util.hpp"
00078
00079
00080
00081 #include <objmgr/util/sequence.hpp>
00082
00083
00084 #include <util/tables/raw_scoremat.h>
00085
00086
00087 #include <objects/seq/seqport_util.hpp>
00088
00089 #include "test_objmgr.hpp"
00090
00091 using namespace std;
00092 using namespace ncbi;
00093 using namespace ncbi::objects;
00094 using namespace ncbi::blast;
00095
00096
00097
00098 class CPssmCreateTestFixture {
00099 public:
00100
00101
00102 static string
00103 x_ErrorCodeToString(int error_code)
00104 {
00105 return CPssmEngine::x_ErrorCodeToString(error_code);
00106 }
00107
00108
00109
00110
00111
00112 static void
00113 x_GetSubjectSequence(const objects::CDense_seg& ds, objects::CScope& scope,
00114 string& sequence_data)
00115 {
00116 return CPsiBlastInputData::x_GetSubjectSequence(ds, scope, sequence_data);
00117 }
00118
00119
00120
00121
00122
00123
00124
00125
00126 static unsigned char*
00127 x_GuardProteinQuery(const unsigned char* query,
00128 unsigned int query_length)
00129 {
00130 return CPssmEngine::x_GuardProteinQuery(query, query_length);
00131 }
00132
00133
00134
00135
00136
00137 static unsigned int
00138 GetNumAlignedSequences(const CPsiBlastInputData& input)
00139 {
00140 return input.GetNumAlignedSequences();
00141 }
00142 };
00143
00144
00145
00146
00147
00148
00149 class CPssmInputFlankingGaps : public IPssmInputData
00150 {
00151 public:
00152 CPssmInputFlankingGaps() {
00153 const unsigned int kQuerySize = 10;
00154 const unsigned int kNumSeqs = 2;
00155 const unsigned char kQuery[] = { 3, 9, 14, 20, 6, 23, 1, 7, 16, 5 };
00156
00157 m_query = new unsigned char[kQuerySize];
00158 memcpy((void*) m_query, (void*) kQuery, kQuerySize*sizeof(*kQuery));
00159
00160 m_dim.query_length = kQuerySize;
00161 m_dim.num_seqs = kNumSeqs;
00162
00163 m_msa = PSIMsaNew(&m_dim);
00164
00165 for (unsigned int i = 0; i < m_dim.query_length; i++) {
00166 for (unsigned int j = 0; j < m_dim.num_seqs+1; j++) {
00167 m_msa->data[j][i].letter = kQuery[i];
00168 m_msa->data[j][i].is_aligned = true;
00169 }
00170 }
00171
00172
00173 m_msa->data[1][0].letter =
00174 m_msa->data[2][0].letter =
00175 m_msa->data[2][m_dim.query_length-1].letter =
00176 AMINOACID_TO_NCBISTDAA[(int)'-'];
00177
00178 m_options = NULL;
00179 PSIBlastOptionsNew(&m_options);
00180
00181
00182 memset((void*) &m_diag_request, 0, sizeof(m_diag_request));
00183 }
00184
00185 virtual ~CPssmInputFlankingGaps() {
00186 delete [] m_query;
00187 m_msa = PSIMsaFree(m_msa);
00188 m_options = PSIBlastOptionsFree(m_options);
00189 }
00190
00191 void Process() {}
00192 unsigned char* GetQuery() { return m_query; }
00193 unsigned int GetQueryLength() { return m_dim.query_length; }
00194 PSIMsa* GetData() { return m_msa; }
00195 const PSIBlastOptions* GetOptions() { return m_options; }
00196 const PSIDiagnosticsRequest* GetDiagnosticsRequest() {
00197 return &m_diag_request;
00198 }
00199
00200 protected:
00201
00202 unsigned char* m_query;
00203 PSIMsaDimensions m_dim;
00204 PSIMsa* m_msa;
00205 PSIBlastOptions* m_options;
00206 PSIDiagnosticsRequest m_diag_request;
00207 };
00208
00209
00210
00211 class CPssmInputGapsInQuery : public CPssmInputFlankingGaps
00212 {
00213 public:
00214 CPssmInputGapsInQuery() {
00215
00216 for (unsigned int i = 0; i < m_dim.query_length; i++) {
00217 for (unsigned int j = 0; j < m_dim.num_seqs+1; j++) {
00218 m_msa->data[j][i].letter = m_query[i];
00219 m_msa->data[j][i].is_aligned = true;
00220 }
00221 }
00222
00223
00224 CRandom r(time(NULL));
00225 int gap_position = r.GetRand(0, GetQueryLength() - 1);
00226 m_query[gap_position] = AMINOACID_TO_NCBISTDAA[(int)'-'];
00227 m_msa->data[0][gap_position].letter = m_query[gap_position];
00228 }
00229 };
00230
00231
00232
00233 class CPssmInputQueryLength0 : public CPssmInputFlankingGaps
00234 {
00235 public:
00236 unsigned int GetQueryLength() { return 0; }
00237 };
00238
00239
00240 class CNullPssmInput: public IPssmInputData
00241 {
00242 public:
00243 void Process() {}
00244 unsigned char* GetQuery() { return NULL; }
00245 unsigned int GetQueryLength() { return 0; }
00246 PSIMsa* GetData() { return NULL; }
00247 const PSIBlastOptions* GetOptions() { return NULL; }
00248 const char* GetMatrixName() { return NULL; }
00249 const PSIDiagnosticsRequest* GetDiagnosticsRequest() { return NULL; }
00250 };
00251
00252 class CPssmInputUnsupportedMatrix : public CPssmInputFlankingGaps
00253 {
00254 public:
00255 const char* GetMatrixName() { return "TEST"; }
00256 };
00257
00258
00259
00260
00261 class CPssmInputTestData : public CPssmInputFlankingGaps
00262 {
00263 public:
00264
00265
00266 typedef pair<TSeqPos, TSeqPos> TAlignedSegment;
00267
00268
00269
00270 enum EAlignmentType {
00271 eSelfHit,
00272 eDuplicateHit,
00273
00274 eNearIdenticalHits,
00275
00276 eMsaHasUnalignedRegion,
00277
00278
00279
00280
00281
00282
00283 eQueryAlignedWithInternalGaps,
00284
00285
00286
00287
00288
00289
00290 eHenikoffsPaper
00291 };
00292
00293 CPssmInputTestData(EAlignmentType type, PSIBlastOptions* opts = NULL) {
00294
00295
00296 if (m_query) {
00297 delete [] m_query;
00298 m_query = NULL;
00299 m_msa = PSIMsaFree(m_msa);
00300 m_options = PSIBlastOptionsFree(m_options);
00301 }
00302
00303 PSIBlastOptionsNew(&m_options);
00304 if (opts) {
00305 memcpy((void*)&m_options, (void*)opts, sizeof(PSIBlastOptions));
00306 }
00307
00308 switch (type) {
00309 case eSelfHit:
00310 SetupSelfHit();
00311 break;
00312
00313 case eDuplicateHit:
00314 SetupDuplicateHit();
00315 break;
00316
00317 case eNearIdenticalHits:
00318 SetupNearIdenticalHits();
00319 break;
00320
00321 case eMsaHasUnalignedRegion:
00322 SetupMsaHasUnalignedRegion();
00323 break;
00324
00325 case eQueryAlignedWithInternalGaps:
00326 SetupQueryAlignedWithInternalGaps();
00327 break;
00328
00329 case eHenikoffsPaper:
00330 SetupHenikoffsPositionBasedSequenceWeights();
00331 break;
00332
00333 default:
00334 throw std::logic_error("Unsupported alignment test data");
00335 }
00336 }
00337
00338 ~CPssmInputTestData() {
00339 delete [] m_query;
00340 m_query = NULL;
00341 m_msa = PSIMsaFree(m_msa);
00342 m_options = PSIBlastOptionsFree(m_options);
00343 }
00344
00345
00346 private:
00347
00348 static const size_t kQueryLength = 232;
00349 static const Uint1 kQuery[kQueryLength];
00350
00351 void SetupSelfHit(void) {
00352 const Uint4 kNumAlignedSeqs = 1;
00353
00354 m_dim.query_length = kQueryLength;
00355 m_dim.num_seqs = kNumAlignedSeqs;
00356 m_msa = PSIMsaNew(&m_dim);
00357 m_query = new unsigned char[kQueryLength];
00358
00359
00360 for (unsigned int i = 0; i < kQueryLength; i++) {
00361 for (unsigned int seq_idx = 0; seq_idx < kNumAlignedSeqs + 1;
00362 seq_idx++) {
00363 m_msa->data[seq_idx][i].letter = m_query[i] = kQuery[i];
00364 m_msa->data[seq_idx][i].is_aligned = true;
00365 }
00366 }
00367 }
00368
00369 Uint1 FindNonIdenticalHighScoringResidue
00370 (Uint1 res, const SNCBIPackedScoreMatrix* score_matrix)
00371 {
00372 BOOST_REQUIRE(score_matrix);
00373 Uint1 retval = AMINOACID_TO_NCBISTDAA[(int)'-'];
00374 int max_score = BLAST_SCORE_MIN;
00375
00376 for (size_t i = 0; i < BLASTAA_SIZE; i++) {
00377
00378 if (i == res) {
00379 continue;
00380 }
00381 int score =
00382 static_cast<int>(NCBISM_GetScore(score_matrix, res, i));
00383 if (score > max_score) {
00384 max_score = score;
00385 retval = i;
00386 }
00387 }
00388 BOOST_REQUIRE(retval != AMINOACID_TO_NCBISTDAA[(int)'-']);
00389 return retval;
00390 }
00391
00392 void SetupMsaHasUnalignedRegion(void) {
00393 const Uint4 kNumAlignedSeqs = 2;
00394
00395 m_dim.query_length = kQueryLength;
00396 m_dim.num_seqs = kNumAlignedSeqs;
00397 m_msa = PSIMsaNew(&m_dim);
00398 m_query = new unsigned char[kQueryLength];
00399
00400
00401 for (unsigned int i = 0; i < kQueryLength; i++) {
00402 m_msa->data[0][i].letter = m_query[i] = kQuery[i];
00403 m_msa->data[0][i].is_aligned = true;
00404 }
00405
00406 const SNCBIPackedScoreMatrix* score_matrix = &NCBISM_Blosum62;
00407
00408
00409
00410
00411
00412 const TAlignedSegment kFirstAlignment(0, 100);
00413 for (unsigned int i = kFirstAlignment.first;
00414 i < kFirstAlignment.second; i++) {
00415 m_msa->data[1][i].letter =
00416 FindNonIdenticalHighScoringResidue(kQuery[i], score_matrix);
00417 m_msa->data[1][i].is_aligned = true;
00418 }
00419
00420
00421
00422
00423
00424 const TAlignedSegment kSecondAlignment(200, kQueryLength);
00425 for (unsigned int i = kSecondAlignment.first;
00426 i < kSecondAlignment.second; i++) {
00427 m_msa->data[2][i].letter =
00428 FindNonIdenticalHighScoringResidue(kQuery[i], score_matrix);
00429 m_msa->data[2][i].is_aligned = true;
00430 }
00431 }
00432
00433 void SetupQueryAlignedWithInternalGaps() {
00434 using std::pair;
00435 using std::string;
00436 using std::vector;
00437
00438 const Uint4 kNumAlignedSeqs = 1;
00439 const size_t kLocalQueryLength = 87;
00440
00441 m_dim.query_length = kLocalQueryLength;
00442 m_dim.num_seqs = kNumAlignedSeqs;
00443 m_msa = PSIMsaNew(&m_dim);
00444 m_query = new unsigned char[kLocalQueryLength];
00445
00446 string query_seq("MFKVYGYDSNIHKCGPCDNAKRLLTVKKQPFEFINIM");
00447 query_seq += string("PEKGVFDDEKIAELLTKLGRDTQIGLTMPQVFAPDGSHIGGFD");
00448 query_seq += string("QLREYFK");
00449
00450 typedef pair<TAlignedSegment, string> TAlignedSequence;
00451 vector<TAlignedSequence> aligned_sequence;
00452
00453 TAlignedSequence region(make_pair(make_pair(0U, 8U),
00454 string("KVVVFIKP")));
00455 aligned_sequence.push_back(region);
00456
00457 region = make_pair(make_pair(12U, 39U),
00458 string("TCPFCRKTQELLSQLPFLLEFVDITAT"));
00459 aligned_sequence.push_back(region);
00460
00461 region = make_pair(make_pair(41U, 57U), string("SDTNEIQDYLQQLTGA"));
00462 aligned_sequence.push_back(region);
00463
00464 region = make_pair(make_pair(62U, 71U), string("RTVPRVFIG"));
00465 aligned_sequence.push_back(region);
00466
00467 region = make_pair(make_pair(72U, 87U), string("KECIGGCTDLESMHK"));
00468 aligned_sequence.push_back(region);
00469
00470
00471 const Uint1 kGapResidue = AMINOACID_TO_NCBISTDAA[(int)'-'];
00472 for (Uint4 i = 0; i < kLocalQueryLength; i++) {
00473 m_query[i] = CSeqportUtil::GetIndex(CSeq_data::e_Ncbistdaa,
00474 query_seq.substr(i, 1));
00475 m_msa->data[0][i].letter = m_query[i];
00476 m_msa->data[0][i].is_aligned = true;
00477
00478
00479 m_msa->data[1][i].letter = kGapResidue;
00480 m_msa->data[1][i].is_aligned = true;
00481 }
00482
00483
00484 ITERATE(vector<TAlignedSequence>, itr, aligned_sequence) {
00485 TAlignedSegment loc = itr->first;
00486 string sequence_data = itr->second;
00487
00488 for (Uint4 i = loc.first, j = 0; i < loc.second; i++, j++) {
00489 m_msa->data[1][i].letter =
00490 CSeqportUtil::GetIndex(CSeq_data::e_Ncbistdaa,
00491 sequence_data.substr(j, 1));
00492 }
00493 }
00494 }
00495
00496 void SetupHenikoffsPositionBasedSequenceWeights(void) {
00497 const Uint4 kNumAlignedSeqs = 3;
00498 const Uint1 kQuerySequence[5] = { 7, 22, 19, 7, 17 };
00499 const Uint1 kSeq1[5] = { 7, 6, 4, 7, 6 };
00500 const Uint1 kSeq2[5] = { 7, 22, 4, 7, 6 };
00501 const Uint1 kSeq3[5] = { 7, 22, 15, 7, 7 };
00502
00503 m_dim.query_length = sizeof(kQuery);
00504 m_dim.num_seqs = kNumAlignedSeqs;
00505 m_msa = PSIMsaNew(&m_dim);
00506 m_query = new unsigned char[sizeof(kQuerySequence)];
00507
00508
00509 for (Uint4 s = 0; s < kNumAlignedSeqs; s++) {
00510
00511 const Uint1* sequence = NULL;
00512 switch (s) {
00513 case 0: sequence = kSeq1; break;
00514 case 1: sequence = kSeq2; break;
00515 case 2: sequence = kSeq3; break;
00516 default: abort();
00517 }
00518
00519 for (Uint4 i = 0; i < sizeof(kQuerySequence); i++) {
00520 m_query[i] = kQuerySequence[i];
00521 m_msa->data[s][i].letter = sequence[i];
00522 m_msa->data[s][i].is_aligned = true;
00523 }
00524 }
00525 }
00526
00527 void SetupDuplicateHit(void) {
00528 const Uint4 kNumAlignedSeqs = 2;
00529
00530
00531
00532 const Uint1 kGi_129296_[388] = {
00533 12, 4, 17, 9, 17, 19, 18, 13, 1, 10, 6, 3, 6, 4, 19,
00534 6, 13, 5, 12, 10, 19, 8, 8, 19, 13, 5, 13, 9, 11, 22,
00535 3, 14, 11, 17, 9, 11, 18, 1, 11, 1, 12, 19, 22, 11, 7,
00536 1, 16, 7, 13, 18, 5, 17, 15, 12, 10, 10, 19, 11, 8, 6,
00537 4, 17, 9, 18, 7, 1, 7, 17, 18, 18, 4, 17, 15, 3, 7,
00538 17, 17, 5, 22, 19, 8, 13, 11, 6, 10, 5, 11, 11, 17, 5,
00539 9, 18, 16, 14, 13, 1, 18, 22, 17, 11, 5, 9, 1, 4, 10,
00540 11, 22, 19, 4, 10, 18, 6, 17, 19, 11, 14, 5, 22, 11, 17,
00541 3, 1, 16, 10, 6, 22, 18, 7, 7, 19, 5, 5, 19, 13, 6,
00542 10, 18, 1, 1, 5, 5, 1, 16, 15, 11, 9, 13, 17, 20, 19,
00543 5, 10, 5, 18, 13, 7, 15, 9, 10, 4, 11, 11, 19, 17, 17,
00544 17, 9, 4, 6, 7, 18, 18, 12, 19, 6, 9, 13, 18, 9, 22,
00545 6, 10, 7, 9, 20, 10, 9, 1, 6, 13, 18, 5, 4, 18, 16,
00546 5, 12, 14, 6, 17, 12, 18, 10, 5, 5, 17, 10, 14, 19, 15,
00547 12, 12, 3, 12, 13, 13, 17, 6, 13, 19, 1, 18, 11, 14, 1,
00548 5, 10, 12, 10, 9, 11, 5, 11, 14, 22, 1, 17, 7, 4, 11,
00549 17, 12, 11, 19, 11, 11, 14, 4, 5, 19, 17, 7, 11, 5, 16,
00550 9, 5, 10, 18, 9, 13, 6, 4, 10, 11, 16, 5, 20, 18, 17,
00551 18, 13, 1, 12, 1, 10, 10, 17, 12, 10, 19, 22, 11, 14, 16,
00552 12, 10, 9, 5, 5, 10, 22, 13, 11, 18, 17, 9, 11, 12, 1,
00553 11, 7, 12, 18, 4, 11, 6, 17, 16, 17, 1, 13, 11, 18, 7,
00554 9, 17, 17, 19, 4, 13, 11, 12, 9, 17, 4, 1, 19, 8, 7,
00555 19, 6, 12, 5, 19, 13, 5, 5, 7, 18, 5, 1, 18, 7, 17,
00556 18, 7, 1, 9, 7, 13, 9, 10, 8, 17, 11, 5, 11, 5, 5,
00557 6, 16, 1, 4, 8, 14, 6, 11, 6, 6, 9, 16, 22, 13, 14,
00558 18, 13, 1, 9, 11, 6, 6, 7, 16, 22, 20, 17, 14};
00559
00560 m_dim.query_length = kQueryLength;
00561 m_dim.num_seqs = kNumAlignedSeqs;
00562 m_msa = PSIMsaNew(&m_dim);
00563 m_query = new unsigned char[kQueryLength];
00564
00565 for (unsigned int i = 0; i < kQueryLength; i++) {
00566 m_msa->data[kQueryIndex][i].letter = m_query[i] = kQuery[i];
00567 m_msa->data[kQueryIndex][i].is_aligned = true;
00568 }
00569
00570 for (unsigned int i = 1; i < kNumAlignedSeqs + 1; i++) {
00571 for (unsigned int j = 0; j < kQueryLength; j++) {
00572 m_msa->data[i][j].letter = kGi_129296_[j];
00573 m_msa->data[i][j].is_aligned = true;
00574 }
00575 }
00576 }
00577
00578 void SetupNearIdenticalHits(void) {
00579 SetupDuplicateHit();
00580
00581 const Uint4 kHitIndex = 2;
00582 const Uint4 kNumIdenticalResidues = (Uint4) (GetQueryLength() *
00583 (kPSINearIdentical + 0.01));
00584
00585 for (Uint4 i = kNumIdenticalResidues; i < GetQueryLength(); i++) {
00586 Uint1& residue = m_msa->data[kHitIndex][i].letter;
00587 residue = (residue + 1) % BLASTAA_SIZE;
00588 BOOST_REQUIRE(residue > 0 && residue < BLASTAA_SIZE);
00589 }
00590 }
00591 };
00592
00593 const size_t CPssmInputTestData::kQueryLength;
00594 const Uint1 CPssmInputTestData::kQuery[CPssmInputTestData::kQueryLength] = {
00595 15, 9, 10, 4, 11, 11, 19, 17, 17, 17, 18, 4, 11, 4, 18,
00596 18, 11, 19, 11, 19, 13, 1, 9, 22, 6, 10, 7, 12, 20, 10,
00597 18, 1, 6, 13, 1, 5, 4, 18, 16, 5, 12, 14, 6, 8, 19,
00598 18, 10, 15, 5, 17, 10, 14, 19, 15, 12, 12, 3, 12, 13, 13,
00599 17, 6, 13, 19, 1, 18, 11, 14, 1, 5, 10, 12, 10, 9, 11,
00600 5, 11, 14, 6, 1, 17, 7, 4, 11, 17, 12, 11, 19, 11, 11,
00601 14, 4, 5, 19, 17, 4, 11, 5, 16, 9, 5, 10, 18, 9, 13,
00602 6, 5, 10, 11, 18, 5, 20, 18, 13, 14, 13, 18, 12, 5, 10,
00603 16, 16, 19, 10, 19, 22, 11, 14, 15, 12, 10, 9, 5, 5, 10,
00604 22, 13, 11, 18, 17, 19, 11, 12, 1, 11, 7, 12, 18, 4, 11,
00605 6, 9, 14, 17, 1, 13, 11, 18, 7, 9, 17, 17, 1, 5, 17,
00606 11, 10, 9, 17, 15, 1, 19, 8, 7, 1, 6, 12, 5, 11, 17,
00607 5, 4, 7, 9, 5, 12, 1, 7, 17, 18, 7, 19, 9, 5, 4,
00608 9, 10, 8, 17, 14, 5, 17, 5, 15, 6, 16, 1, 4, 8, 14,
00609 6, 11, 6, 11, 9, 10, 8, 13, 14, 18, 13, 18, 9, 19, 22,
00610 6, 7, 16, 22, 20, 17, 14};
00611
00612
00613
00614 BEGIN_NCBI_SCOPE
00615
00616
00617 template <>
00618 struct Deleter<_PSIAlignedBlock> {
00619 static void Delete(_PSIAlignedBlock* p)
00620 { _PSIAlignedBlockFree(p); }
00621 };
00622
00623 template <>
00624 struct Deleter<_PSISequenceWeights> {
00625 static void Delete(_PSISequenceWeights* p)
00626 { _PSISequenceWeightsFree(p); }
00627 };
00628
00629 template <>
00630 struct Deleter<_PSIInternalPssmData> {
00631 static void Delete(_PSIInternalPssmData* p)
00632 { _PSIInternalPssmDataFree(p); }
00633 };
00634
00635 template <>
00636 struct Deleter<_PSIMsa> {
00637 static void Delete(_PSIMsa* p)
00638 { _PSIMsaFree(p); }
00639 };
00640
00641 template <>
00642 struct Deleter<_PSIPackedMsa> {
00643 static void Delete(_PSIPackedMsa* p)
00644 { _PSIPackedMsaFree(p); }
00645 };
00646
00647 END_NCBI_SCOPE
00648
00649 BOOST_FIXTURE_TEST_SUITE(pssmcreate, CPssmCreateTestFixture)
00650
00651
00652
00653
00654
00655 static BlastScoreBlk* InitializeBlastScoreBlk(const unsigned char* query,
00656 Uint4 query_size) {
00657 const EBlastProgramType kProgramType = eBlastTypeBlastp;
00658 const double kScaleFactor = 1.0;
00659 Blast_Message* errors = NULL;
00660 short status = 0;
00661
00662
00663 CBlastScoringOptions opts;
00664 status = BlastScoringOptionsNew(kProgramType, &opts);
00665 BOOST_REQUIRE(status == 0);
00666
00667
00668 CBLAST_SequenceBlk query_blk;
00669 status = BlastSeqBlkNew(&query_blk);
00670 BOOST_REQUIRE(status == 0);
00671 status = BlastSeqBlkSetSequence(query_blk, query, query_size);
00672 BOOST_REQUIRE(status == 0);
00673
00674 query_blk->sequence_allocated = FALSE;
00675 query_blk->sequence_start_allocated = FALSE;
00676
00677 const Uint1 kNullByte = GetSentinelByte(eBlastEncodingProtein);
00678 BOOST_REQUIRE(query_blk.Get() != NULL);
00679 BOOST_REQUIRE(query_blk->sequence[0] != kNullByte);
00680 BOOST_REQUIRE(query_blk->sequence[query_blk->length - 1] != kNullByte);
00681 BOOST_REQUIRE(query_blk->sequence_start[0] == kNullByte);
00682 BOOST_REQUIRE(query_blk->sequence_start[query_blk->length + 1] ==
00683 kNullByte);
00684
00685
00686 CBlastQueryInfo query_info(TestUtil::CreateProtQueryInfo(query_size));
00687
00688 BlastScoreBlk* retval = NULL;
00689 status = BlastSetup_ScoreBlkInit(query_blk,
00690 query_info,
00691 opts,
00692 kProgramType,
00693 &retval,
00694 kScaleFactor,
00695 &errors,
00696 &BlastFindMatrixPath);
00697 if (status) {
00698 throw runtime_error(errors->message);
00699 }
00700 BOOST_REQUIRE(retval->kbp_ideal);
00701
00702
00703
00704 return retval;
00705 }
00706
00707
00708 BOOST_AUTO_TEST_CASE(testFullPssmEngineRunWithDiagnosticsRequest) {
00709
00710 const string seqalign("data/nr-129295.new.asn.short");
00711 auto_ptr<CObjectIStream> in
00712 (CObjectIStream::Open(seqalign, eSerial_AsnText));
00713
00714 CRef<CSeq_align_set> sas(new CSeq_align_set());
00715 *in >> *sas;
00716
00717 CSeq_id qid("gi|129295"), sid("gi|6");
00718 auto_ptr<SSeqLoc> q(CTestObjMgr::Instance().CreateSSeqLoc(qid));
00719 SBlastSequence seq(GetSequence(*q->seqloc, eBlastEncodingProtein, q->scope));
00720
00721 CPSIBlastOptions opts;
00722 PSIBlastOptionsNew(&opts);
00723
00724 PSIDiagnosticsRequest request;
00725 memset((void*) &request, 0, sizeof(request));
00726 request.information_content = false;
00727 request.residue_frequencies = true;
00728 request.weighted_residue_frequencies = true;
00729 request.frequency_ratios = true;
00730 request.gapless_column_weights = false;
00731
00732 CRef<IPssmInputData> pssm_strategy(
00733 new CPsiBlastInputData(seq.data.get()+1,
00734 seq.length-2,
00735 sas, q->scope,
00736 *opts,
00737 "BLOSUM80",
00738 &request));
00739 CRef<CPssmEngine> pssm_engine(new CPssmEngine(pssm_strategy));
00740 CRef<CPssmWithParameters> pssm = pssm_engine->Run();
00741
00742 const size_t kNumElements =
00743 pssm_strategy->GetQueryLength() * BLASTAA_SIZE;
00744
00745 const CPssmIntermediateData::TResFreqsPerPos& res_freqs =
00746 pssm->GetPssm().GetIntermediateData().GetResFreqsPerPos();
00747 BOOST_REQUIRE_EQUAL(kNumElements, res_freqs.size());
00748
00749 const CPssmIntermediateData::TWeightedResFreqsPerPos& wres_freqs =
00750 pssm->GetPssm().GetIntermediateData().GetWeightedResFreqsPerPos();
00751 BOOST_REQUIRE_EQUAL(kNumElements, wres_freqs.size());
00752
00753 const CPssmIntermediateData::TFreqRatios& freq_ratios =
00754 pssm->GetPssm().GetIntermediateData().GetFreqRatios();
00755 BOOST_REQUIRE_EQUAL(kNumElements, freq_ratios.size());
00756
00757
00758
00759
00760 pssm_strategy.Reset();
00761 pssm_engine.Reset();
00762 memset((void*) &request, 0, sizeof(request));
00763 request.information_content = true;
00764
00765 pssm_strategy.Reset(
00766 new CPsiBlastInputData(seq.data.get(),
00767 seq.length,
00768 sas, q->scope,
00769 *opts,
00770 "BLOSUM80",
00771 &request));
00772 pssm_engine.Reset(new CPssmEngine(pssm_strategy));
00773 BOOST_CHECK_THROW(pssm_engine->Run(), CBlastException);
00774
00775 pssm_strategy.Reset();
00776 pssm_engine.Reset();
00777
00778
00779 memset((void*) &request, 0, sizeof(request));
00780 request.gapless_column_weights = true;
00781
00782 pssm_strategy.Reset(
00783 new CPsiBlastInputData(seq.data.get(),
00784 seq.length,
00785 sas, q->scope,
00786 *opts,
00787 "BLOSUM80",
00788 &request));
00789 pssm_engine.Reset(new CPssmEngine(pssm_strategy));
00790 BOOST_CHECK_THROW(pssm_engine->Run(), CBlastException);
00791 }
00792
00793
00794
00795 BOOST_AUTO_TEST_CASE(testSeqAlignToPsiBlastMultipleSequenceAlignment) {
00796
00797
00798 CSeq_id qid("gi|129295"), sid("gi|6");
00799 auto_ptr<SSeqLoc> q(CTestObjMgr::Instance().CreateSSeqLoc(qid));
00800 auto_ptr<SSeqLoc> s(CTestObjMgr::Instance().CreateSSeqLoc(sid));
00801 CBl2Seq blaster(*q, *s, eBlastp);
00802 TSeqAlignVector sasv = blaster.Run();
00803 BOOST_REQUIRE(sasv.size() != 0);
00804
00805 CPSIBlastOptions opts;
00806 PSIBlastOptionsNew(&opts);
00807
00808 opts->inclusion_ethresh = BLAST_EXPECT_VALUE;
00809 opts->use_best_alignment = FALSE;
00810
00811
00812 SBlastSequence seq(GetSequence(*q->seqloc, eBlastEncodingProtein, q->scope));
00813
00814 try {
00815 auto_ptr<CPsiBlastInputData> pssm_input(
00816 new CPsiBlastInputData(seq.data.get()+1,
00817 seq.length-2,
00818 sasv[0], q->scope, *opts));
00819
00820 CPssmEngine pssm_engine(pssm_input.get());
00821 pssm_input->Process();
00822
00823 TSeqPos nseqs = CPssmCreateTestFixture::GetNumAlignedSequences(*pssm_input) + 1;
00824
00825
00826
00827
00828
00829
00830
00831 TSeqPos seq_index = 1;
00832 const PSIMsaCell kNullPSIMsaCell = {
00833 (unsigned char) 0,
00834 false
00835 };
00836
00837
00838
00839 vector<PSIMsaCell> aligned_pos(pssm_input->GetQueryLength());
00840 fill(aligned_pos.begin(), aligned_pos.end(), kNullPSIMsaCell);
00841
00842
00843
00844
00845 ITERATE(CSeq_align_set::Tdata, hsp, sasv[0]->Get()) {
00846 const CDense_seg& ds = (*hsp)->GetSegs().GetDenseg();
00847 string subj;
00848 CPssmCreateTestFixture::x_GetSubjectSequence(ds,
00849 *s->scope, subj);
00850 const vector<TSignedSeqPos>& starts = ds.GetStarts();
00851 const vector<TSeqPos>& lengths = ds.GetLens();
00852
00853 for (int i = 0; i < ds.GetNumseg(); i++) {
00854 TSignedSeqPos q_index = starts[i*ds.GetDim()];
00855 TSignedSeqPos s_index = starts[i*ds.GetDim()+1];
00856
00857 #define GAP_IN_ALIGNMENT -1
00858 if (s_index == (int)GAP_IN_ALIGNMENT) {
00859 for (TSeqPos pos = 0; pos < lengths[i]; pos++) {
00860 PSIMsaCell& pd = aligned_pos[q_index++];
00861 pd.letter = AMINOACID_TO_NCBISTDAA[(Uint1)'-'];
00862 pd.is_aligned = true;
00863 }
00864 } else if (q_index == (int)GAP_IN_ALIGNMENT) {
00865 s_index += lengths[i];
00866 continue;
00867 } else {
00868 s_index = (i == 0) ? 0 : (s_index - starts[1]);
00869 for (TSeqPos pos = 0; pos < lengths[i]; pos++) {
00870 PSIMsaCell& pd = aligned_pos[q_index++];
00871 pd.letter = subj[s_index++];
00872 pd.is_aligned = true;
00873 }
00874 }
00875 }
00876 }
00877
00878 stringstream ss;
00879
00880 for (TSeqPos i = 0; i < pssm_input->GetQueryLength(); i++) {
00881 BOOST_REQUIRE(seq_index < nseqs);
00882 const PSIMsaCell& pos_desc =
00883 pssm_input->GetData()->data[seq_index][i];
00884 ss.str("");
00885 ss << "Sequence " << seq_index << ", position " << i
00886 << " differ";
00887 BOOST_REQUIRE_MESSAGE(aligned_pos[i].letter == pos_desc.letter &&
00888 aligned_pos[i].is_aligned == pos_desc.is_aligned, ss.str());
00889 }
00890
00891 seq_index++;
00892 } catch (const exception& e) {
00893 cerr << e.what() << endl;
00894 BOOST_REQUIRE(false);
00895 } catch (...) {
00896 cerr << "Unknown exception" << endl;
00897 BOOST_REQUIRE(false);
00898 }
00899 }
00900
00901
00902
00903
00904 BOOST_AUTO_TEST_CASE(testPurgeSequencesWithNull) {
00905 int rv = _PSIPurgeBiasedSegments(NULL);
00906 BOOST_REQUIRE_EQUAL(PSIERR_BADPARAM, rv);
00907 }
00908
00909 BOOST_AUTO_TEST_CASE(testPurgeSelfHit) {
00910 auto_ptr<IPssmInputData> pssm_input
00911 (new CPssmInputTestData(CPssmInputTestData::eSelfHit));
00912 pssm_input->Process();
00913 AutoPtr<_PSIPackedMsa> msa(_PSIPackedMsaNew(pssm_input->GetData()));
00914 int rv = _PSIPurgeBiasedSegments(msa.get());
00915 BOOST_REQUIRE_EQUAL(PSI_SUCCESS, rv);
00916 const Uint4 kSelfHitIndex = 1;
00917 BOOST_REQUIRE_EQUAL(true, (bool) msa->use_sequence[kQueryIndex]);
00918 BOOST_REQUIRE_EQUAL(false, (bool) msa->use_sequence[kSelfHitIndex]);
00919 }
00920
00921 BOOST_AUTO_TEST_CASE(testPurgeDuplicateHit) {
00922 auto_ptr<IPssmInputData> pssm_input
00923 (new CPssmInputTestData(CPssmInputTestData::eDuplicateHit));
00924 pssm_input->Process();
00925 AutoPtr<_PSIPackedMsa> msa(_PSIPackedMsaNew(pssm_input->GetData()));
00926 int rv = _PSIPurgeBiasedSegments(msa.get());
00927 BOOST_REQUIRE_EQUAL(PSI_SUCCESS, rv);
00928 const Uint4 kDuplicateHitIndex = 2;
00929 BOOST_REQUIRE_EQUAL(false,
00930 (bool) msa->use_sequence[kDuplicateHitIndex]);
00931 BOOST_REQUIRE_EQUAL(true, (bool) msa->use_sequence[kQueryIndex]);
00932 BOOST_REQUIRE_EQUAL(true, (bool) msa->use_sequence[kQueryIndex + 1]);
00933 }
00934
00935 BOOST_AUTO_TEST_CASE(testPurgeNearIdenticalHits) {
00936 auto_ptr<IPssmInputData> pssm_input
00937 (new CPssmInputTestData(CPssmInputTestData::eNearIdenticalHits));
00938 pssm_input->Process();
00939 AutoPtr<_PSIPackedMsa> msa(_PSIPackedMsaNew(pssm_input->GetData()));
00940 int rv = _PSIPurgeBiasedSegments(msa.get());
00941 BOOST_REQUIRE_EQUAL(PSI_SUCCESS, rv);
00942 const Uint4 kRemovedHitIndex = 2;
00943 BOOST_REQUIRE_EQUAL(false,
00944 (bool) msa->use_sequence[kRemovedHitIndex]);
00945 BOOST_REQUIRE_EQUAL(true, (bool) msa->use_sequence[kQueryIndex]);
00946 BOOST_REQUIRE_EQUAL(true, (bool) msa->use_sequence[kQueryIndex + 1]);
00947 }
00948
00949 BOOST_AUTO_TEST_CASE(testQueryAlignedWithInternalGaps) {
00950 auto_ptr<IPssmInputData> pssm_input
00951 (new CPssmInputTestData
00952 (CPssmInputTestData::eQueryAlignedWithInternalGaps));
00953 BOOST_REQUIRE_EQUAL(string("BLOSUM62"),
00954 string(pssm_input->GetMatrixName()));
00955 CPssmEngine pssm_engine(pssm_input.get());
00956 CRef<CPssmWithParameters> pssm_asn = pssm_engine.Run();
00957
00958 auto_ptr< CNcbiMatrix<int> > pssm
00959 (CScorematPssmConverter::GetScores(*pssm_asn));
00960
00961
00962
00963
00964
00965
00966 const SNCBIPackedScoreMatrix* score_matrix = &NCBISM_Blosum62;
00967 const Uint1 kGapResidue = AMINOACID_TO_NCBISTDAA[(int)'-'];
00968 stringstream ss;
00969 BOOST_REQUIRE_EQUAL((size_t)pssm_asn->GetPssm().GetNumColumns(),
00970 (size_t)pssm->GetCols());
00971 BOOST_REQUIRE_EQUAL((size_t)pssm_asn->GetPssm().GetNumRows(),
00972 (size_t)pssm->GetRows());
00973 for (int i = 0; i < pssm_asn->GetPssm().GetNumColumns(); i++) {
00974 for (int j = 0; j < pssm_asn->GetPssm().GetNumRows(); j++) {
00975
00976
00977 if (j == kGapResidue) {
00978 ss.str("");
00979 ss << "Position " << i << " residue "
00980 << TestUtil::GetResidue(j) << " differ on PSSM";
00981 BOOST_REQUIRE_MESSAGE(BLAST_SCORE_MIN == (*pssm)(j, i), ss.str());
00982 } else {
00983 int score =
00984 (int)NCBISM_GetScore(score_matrix,
00985 pssm_input->GetQuery()[i], j);
00986
00987 ss.str("");
00988 ss << "Position " << i << " residue "
00989 << TestUtil::GetResidue(j) << " differ on PSSM: "
00990 << "expected=" << NStr::IntToString(score)
00991 << " actual=" << NStr::IntToString((*pssm)(j, i));
00992 BOOST_REQUIRE_MESSAGE (score-1 <= (*pssm)(j, i) || (*pssm)(j, i) <= score+1, ss.str());
00993 }
00994 }
00995 }
00996 }
00997
00998 BOOST_AUTO_TEST_CASE(testMultiSeqAlignmentHasRegionsUnalignedToQuery) {
00999 auto_ptr<IPssmInputData> pssm_input
01000 (new
01001 CPssmInputTestData(CPssmInputTestData::eMsaHasUnalignedRegion));
01002 pssm_input->Process();
01003 BOOST_REQUIRE_EQUAL(string("BLOSUM62"),
01004 string(pssm_input->GetMatrixName()));
01005
01006
01007
01008 AutoPtr<_PSIPackedMsa> packed_msa
01009 (_PSIPackedMsaNew(pssm_input->GetData()));
01010 int rv = _PSIPurgeBiasedSegments(packed_msa.get());
01011 BOOST_REQUIRE_EQUAL(PSI_SUCCESS, rv);
01012 BOOST_REQUIRE_EQUAL(true,
01013 (bool) packed_msa->use_sequence[kQueryIndex]);
01014 BOOST_REQUIRE_EQUAL(true, (bool) packed_msa->use_sequence[1]);
01015 BOOST_REQUIRE_EQUAL(true, (bool) packed_msa->use_sequence[2]);
01016
01017 AutoPtr<_PSIMsa> msa(_PSIMsaNew(packed_msa.get(), BLASTAA_SIZE));
01018
01019 CPSIBlastOptions opts;
01020 PSIBlastOptionsNew(&opts);
01021 AutoPtr<_PSIAlignedBlock> aligned_blocks(
01022 _PSIAlignedBlockNew(pssm_input->GetQueryLength()));
01023 rv = _PSIComputeAlignmentBlocks(msa.get(), aligned_blocks.get());
01024 stringstream ss;
01025 ss << "_PSIComputeAlignmentBlocks failed: "
01026 << CPssmCreateTestFixture::x_ErrorCodeToString(rv);
01027 BOOST_REQUIRE_MESSAGE(PSI_SUCCESS == rv, ss.str());
01028
01029
01030 vector<CPssmInputTestData::TAlignedSegment> aligned_regions;
01031 aligned_regions.push_back(make_pair(0U, 99U));
01032 aligned_regions.push_back(make_pair(200U,
01033 pssm_input->GetQueryLength()-1));
01034
01035 for (vector<CPssmInputTestData::TAlignedSegment>::const_iterator i =
01036 aligned_regions.begin();
01037 i != aligned_regions.end(); ++i) {
01038 for (TSeqPos pos = i->first; pos < i->second; pos++) {
01039 ss.str("");
01040 ss << "Alignment extents differ at position "
01041 << NStr::IntToString(pos);
01042 BOOST_REQUIRE_MESSAGE((int)i->first == (int)aligned_blocks->pos_extnt[pos].left, ss.str());
01043 BOOST_REQUIRE_MESSAGE((int)i->second == (int)aligned_blocks->pos_extnt[pos].right, ss.str());
01044 BOOST_REQUIRE_MESSAGE( (int)(i->second - i->first + 1) == (int)aligned_blocks->size[pos], ss.str());
01045 }
01046 }
01047
01048
01049 const CPssmInputTestData::TAlignedSegment kUnalignedRange(100, 200);
01050 for (size_t i = kUnalignedRange.first;
01051 i < kUnalignedRange.second; i++) {
01052 ss.str("");
01053 ss << "Alignment extents differ at position "
01054 << NStr::IntToString(i);
01055 BOOST_REQUIRE_MESSAGE((int)-1 == (int)aligned_blocks->pos_extnt[i].left, ss.str());
01056 BOOST_REQUIRE_MESSAGE( (int)pssm_input->GetQueryLength() == (int)aligned_blocks->pos_extnt[i].right, ss.str());
01057 BOOST_REQUIRE_MESSAGE(
01058 (int)(aligned_blocks->pos_extnt[i].right - aligned_blocks->pos_extnt[i].left + 1) == (int)aligned_blocks->size[i],
01059 ss.str());
01060 }
01061
01062
01063 blast::TAutoUint1Ptr query_with_sentinels
01064 (CPssmCreateTestFixture::x_GuardProteinQuery(pssm_input->GetQuery(),
01065 pssm_input->GetQueryLength()));;
01066 CBlastScoreBlk sbp;
01067 sbp.Reset
01068 (InitializeBlastScoreBlk
01069 (query_with_sentinels.get(), pssm_input->GetQueryLength()));
01070 AutoPtr<_PSISequenceWeights> seq_weights(
01071 _PSISequenceWeightsNew(msa->dimensions,
01072 sbp));
01073 rv = _PSIComputeSequenceWeights(msa.get(), aligned_blocks.get(),
01074 opts->nsg_compatibility_mode,
01075 seq_weights.get());
01076 ss.str("");
01077 ss << "_PSIComputeSequenceWeights failed: "
01078 << CPssmCreateTestFixture::x_ErrorCodeToString(rv);
01079 BOOST_REQUIRE_MESSAGE(PSI_SUCCESS == rv, ss.str());
01080
01081
01082
01083 BOOST_REQUIRE_EQUAL(false, (bool)opts->nsg_compatibility_mode);
01084 const Uint1 kXResidue = AMINOACID_TO_NCBISTDAA[(int)'X'];
01085 for (vector<CPssmInputTestData::TAlignedSegment>::const_iterator i =
01086 aligned_regions.begin();
01087 i != aligned_regions.end(); ++i) {
01088 for (TSeqPos pos = i->first; pos < i->second; pos++) {
01089 double total_sequence_weights_for_column = 0.0;
01090 for (size_t res = 0; res < msa->alphabet_size; res++) {
01091 if (res == kXResidue) continue;
01092 total_sequence_weights_for_column +=
01093 seq_weights->match_weights[pos][res];
01094 }
01095 BOOST_REQUIRE(total_sequence_weights_for_column > 0.99 &&
01096 total_sequence_weights_for_column < 1.01);
01097 }
01098 }
01099
01100 for (size_t pos = kUnalignedRange.first;
01101 pos < kUnalignedRange.second; pos++) {
01102 double total_sequence_weights_for_column = 0.0;
01103 for (size_t res = 0; res < msa->alphabet_size; res++) {
01104 if (res == kXResidue) continue;
01105 total_sequence_weights_for_column +=
01106 seq_weights->match_weights[pos][res];
01107 }
01108 BOOST_REQUIRE(total_sequence_weights_for_column == 0.0);
01109 }
01110
01111
01112 AutoPtr<_PSIInternalPssmData> internal_pssm(
01113 _PSIInternalPssmDataNew(pssm_input->GetQueryLength(),
01114 sbp->alphabet_size));
01115 rv = _PSIComputeFreqRatios(msa.get(), seq_weights.get(), sbp,
01116 aligned_blocks.get(), opts->pseudo_count,
01117 opts->nsg_compatibility_mode,
01118 internal_pssm.get());
01119 ss.str("");
01120 ss << "_PSIComputeResidueFrequencies failed: "
01121 << CPssmCreateTestFixture::x_ErrorCodeToString(rv);
01122 BOOST_REQUIRE_MESSAGE(PSI_SUCCESS == rv, ss.str());
01123
01124
01125 rv = _PSIConvertFreqRatiosToPSSM(internal_pssm.get(),
01126 msa->query,
01127 sbp,
01128 seq_weights->std_prob);
01129 ss.str("");
01130 ss << "_PSIConvertResidueFreqsToPSSM failed: "
01131 << CPssmCreateTestFixture::x_ErrorCodeToString(rv);
01132 BOOST_REQUIRE_MESSAGE(PSI_SUCCESS == rv, ss.str());
01133
01134
01135 rv = _PSIScaleMatrix(msa->query,
01136 seq_weights->std_prob,
01137 internal_pssm.get(),
01138 sbp);
01139 ss.str("");
01140 ss << "_PSIScaleMatrix failed: "
01141 << CPssmCreateTestFixture::x_ErrorCodeToString(rv);
01142 BOOST_REQUIRE_MESSAGE(PSI_SUCCESS == rv, ss.str());
01143
01144
01145
01146
01147
01148 const SNCBIPackedScoreMatrix* score_matrix = &NCBISM_Blosum62;
01149 const Uint1 kGapResidue = AMINOACID_TO_NCBISTDAA[(int)'-'];
01150 for (Uint4 i = 0; i < pssm_input->GetQueryLength(); i++) {
01151 for (Uint4 j = 0; j < (Uint4) sbp->alphabet_size; j++) {
01152
01153
01154 if (j == kGapResidue) {
01155 ss.str("");
01156 ss << "Position " << i << " residue "
01157 << TestUtil::GetResidue(j) << " differ on PSSM";
01158 BOOST_REQUIRE_MESSAGE(BLAST_SCORE_MIN == internal_pssm->pssm[i][j], ss.str());
01159 } else {
01160 int score =
01161 (int)NCBISM_GetScore(score_matrix, msa->query[i], j);
01162
01163 ss.str("");
01164 ss << "Position " << i << " residue "
01165 << TestUtil::GetResidue(j) << " differ on PSSM: "
01166 << "expected=" << NStr::IntToString(score)
01167 << " actual=" <<
01168 NStr::IntToString(internal_pssm->pssm[i][j]);
01169 BOOST_REQUIRE_MESSAGE(score-1 <= internal_pssm->pssm[i][j] || internal_pssm->pssm[i][j] <= score+1, ss.str());
01170 }
01171 }
01172 }
01173 }
01174
01175
01176
01177
01178 BOOST_AUTO_TEST_CASE(testQueryIsOnlyAlignedSequenceInMsa) {
01179 auto_ptr<IPssmInputData> pssm_input
01180 (new CPssmInputTestData(CPssmInputTestData::eSelfHit));
01181 pssm_input->Process();
01182 BOOST_REQUIRE_EQUAL(string("BLOSUM62"),
01183 string(pssm_input->GetMatrixName()));
01184
01185
01186
01187 AutoPtr<_PSIPackedMsa> packed_msa
01188 (_PSIPackedMsaNew(pssm_input->GetData()));
01189 int rv = _PSIPurgeBiasedSegments(packed_msa.get());
01190 BOOST_REQUIRE_EQUAL(PSI_SUCCESS, rv);
01191 const Uint4 kSelfHitIndex = 1;
01192 BOOST_REQUIRE_EQUAL(true,
01193 (bool) packed_msa->use_sequence[kQueryIndex]);
01194 BOOST_REQUIRE_EQUAL(false,
01195 (bool) packed_msa->use_sequence[kSelfHitIndex]);
01196
01197 AutoPtr<_PSIMsa> msa(_PSIMsaNew(packed_msa.get(), BLASTAA_SIZE));
01198
01199 CPSIBlastOptions opts;
01200 PSIBlastOptionsNew(&opts);
01201 AutoPtr<_PSIAlignedBlock> aligned_blocks(
01202 _PSIAlignedBlockNew(pssm_input->GetQueryLength()));
01203 rv = _PSIComputeAlignmentBlocks(msa.get(), aligned_blocks.get());
01204 stringstream ss;
01205 ss << "_PSIComputeAlignmentBlocks failed: "
01206 << CPssmCreateTestFixture::x_ErrorCodeToString(rv);
01207 BOOST_REQUIRE_MESSAGE(PSI_SUCCESS == rv, ss.str());
01208
01209 for (size_t i = 0; i < pssm_input->GetQueryLength(); i++) {
01210 BOOST_REQUIRE_EQUAL((int)-1,
01211 (int)aligned_blocks->pos_extnt[i].left);
01212 BOOST_REQUIRE_EQUAL((int)pssm_input->GetQueryLength(),
01213 (int)aligned_blocks->pos_extnt[i].right);
01214 BOOST_REQUIRE_EQUAL((int)pssm_input->GetQueryLength() + 2,
01215 (int)aligned_blocks->size[i]);
01216 }
01217
01218
01219 blast::TAutoUint1Ptr query_with_sentinels
01220 (CPssmCreateTestFixture::x_GuardProteinQuery(pssm_input->GetQuery(),
01221 pssm_input->GetQueryLength()));;
01222 CBlastScoreBlk sbp;
01223 sbp.Reset
01224 (InitializeBlastScoreBlk
01225 (query_with_sentinels.get(), pssm_input->GetQueryLength()));
01226 AutoPtr<_PSISequenceWeights> seq_weights(
01227 _PSISequenceWeightsNew(msa->dimensions,
01228 sbp));
01229 rv = _PSIComputeSequenceWeights(msa.get(), aligned_blocks.get(),
01230
01231
01232 TRUE,
01233 seq_weights.get());
01234 ss.str("");
01235 ss << "_PSIComputeSequenceWeights failed: "
01236 << CPssmCreateTestFixture::x_ErrorCodeToString(rv);
01237 BOOST_REQUIRE_MESSAGE(PSI_SUCCESS == rv, ss.str());
01238
01239
01240 AutoPtr<_PSIInternalPssmData> internal_pssm(
01241 _PSIInternalPssmDataNew(pssm_input->GetQueryLength(),
01242 sbp->alphabet_size));
01243 rv = _PSIComputeFreqRatios(msa.get(), seq_weights.get(), sbp,
01244 aligned_blocks.get(), opts->pseudo_count,
01245 opts->nsg_compatibility_mode,
01246 internal_pssm.get());
01247 ss.str("");
01248 ss << "_PSIComputeResidueFrequencies failed: "
01249 << CPssmCreateTestFixture::x_ErrorCodeToString(rv);
01250 BOOST_REQUIRE_MESSAGE(PSI_SUCCESS == rv, ss.str());
01251
01252
01253 rv = _PSIConvertFreqRatiosToPSSM(internal_pssm.get(),
01254 msa->query,
01255 sbp,
01256 seq_weights->std_prob);
01257 ss.str("");
01258 ss << "_PSIConvertResidueFreqsToPSSM failed: "
01259 << CPssmCreateTestFixture::x_ErrorCodeToString(rv);
01260 BOOST_REQUIRE_MESSAGE(PSI_SUCCESS == rv, ss.str());
01261
01262
01263 rv = _PSIScaleMatrix(msa->query,
01264 seq_weights->std_prob,
01265 internal_pssm.get(),
01266 sbp);
01267 ss.str("");
01268 ss << "_PSIScaleMatrix failed: "
01269 << CPssmCreateTestFixture::x_ErrorCodeToString(rv);
01270 BOOST_REQUIRE_MESSAGE(PSI_SUCCESS == rv, ss.str());
01271
01272
01273
01274
01275
01276 const SNCBIPackedScoreMatrix* score_matrix = &NCBISM_Blosum62;
01277 const Uint1 kGapResidue = AMINOACID_TO_NCBISTDAA[(int)'-'];
01278 for (Uint4 i = 0; i < pssm_input->GetQueryLength(); i++) {
01279 for (Uint4 j = 0; j < (Uint4) sbp->alphabet_size; j++) {
01280
01281
01282 if (j == kGapResidue) {
01283 ss.str("");
01284 ss << "Position " << i << " residue "
01285 << TestUtil::GetResidue(j) << " differ on PSSM";
01286 BOOST_REQUIRE_MESSAGE(BLAST_SCORE_MIN == internal_pssm->pssm[i][j], ss.str());
01287 } else {
01288 int score =
01289 (int)NCBISM_GetScore(score_matrix, msa->query[i], j);
01290
01291 ss.str("");
01292 ss << "Position " << i << " residue "
01293 << TestUtil::GetResidue(j) << " differ on PSSM: "
01294 << "expected=" << NStr::IntToString(score)
01295 << " actual=" <<
01296 NStr::IntToString(internal_pssm->pssm[i][j]);
01297 BOOST_REQUIRE_MESSAGE(score-1 <= internal_pssm->pssm[i][j] || internal_pssm->pssm[i][j] <= score+1, ss.str());
01298 }
01299 }
01300 }
01301 }
01302
01303 BOOST_AUTO_TEST_CASE(testRejectFlankingGaps) {
01304 auto_ptr<IPssmInputData> bad_pssm_data(new CPssmInputFlankingGaps());
01305 CPssmEngine pssm_engine(bad_pssm_data.get());
01306 BOOST_REQUIRE_THROW(pssm_engine.Run(), CBlastException);
01307 }
01308
01309 BOOST_AUTO_TEST_CASE(testRejectGapInQuery) {
01310 auto_ptr<IPssmInputData> bad_pssm_data(new CPssmInputGapsInQuery());
01311 CPssmEngine pssm_engine(bad_pssm_data.get());
01312 BOOST_REQUIRE_THROW(pssm_engine.Run(), CBlastException);
01313 }
01314
01315 BOOST_AUTO_TEST_CASE(testRejectQueryLength0) {
01316 auto_ptr<IPssmInputData> bad_pssm_data(new CPssmInputQueryLength0());
01317 BOOST_REQUIRE_THROW(CPssmEngine pssm_engine(bad_pssm_data.get()), CPssmEngineException);
01318 }
01319
01320 BOOST_AUTO_TEST_CASE(testRejectNullPssmInputData) {
01321 IPssmInputData* null_ptr = NULL;
01322 BOOST_REQUIRE_THROW(CPssmEngine pssm_engine(null_ptr), CPssmEngineException);
01323 }
01324
01325 BOOST_AUTO_TEST_CASE(testRejectNullsReturnedByPssmInput) {
01326 auto_ptr<IPssmInputData> bad_pssm_data(new CNullPssmInput());
01327 BOOST_REQUIRE_THROW(CPssmEngine pssm_engine(bad_pssm_data.get()), CBlastException);
01328 }
01329
01330 BOOST_AUTO_TEST_CASE(testRejectUnsupportedMatrix) {
01331 auto_ptr<IPssmInputData> bad_pssm_data(new
01332 CPssmInputUnsupportedMatrix());
01333 BOOST_REQUIRE_THROW(CPssmEngine pssm_engine(bad_pssm_data.get()), CBlastException);
01334 }
01335
01336
01337
01338 BOOST_AUTO_TEST_CASE(testPsiAlignmentDataCreation_TooMuchMemory) {
01339 const PSIMsaDimensions kDimensions = { ncbi::numeric_limits<int>::max(),
01340 ncbi::numeric_limits<int>::max() };
01341 PSIMsa* msa = PSIMsaNew(&kDimensions);
01342 BOOST_REQUIRE(msa == NULL);
01343 }
01344
01345
01346 BOOST_AUTO_TEST_SUITE_END()
01347
01348
01349
01350
01351
01352
01353
01354
01355
01356
01357
01358
01359
01360
01361
01362
01363
01364
01365
01366
01367
01368
01369
01370
01371
01372
01373
01374
01375
01376
01377
01378
01379
01380
01381
01382
01383
01384
01385
01386
01387
01388
01389
01390
01391
01392
01393
01394
01395
01396
01397
01398
01399
01400
01401
01402
01403
01404
01405
01406
01407
01408
01409
01410
01411
01412
01413
01414
01415
01416
01417
01418
01419
01420
01421
01422
01423
01424
01425
01426
01427
01428
01429
01430
01431
01432
01433
01434
01435
01436
01437
01438
01439
01440
01441
01442
01443
01444
01445
01446
01447
01448
01449
01450
01451
01452
01453
01454
01455
01456
01457
01458
01459
01460
01461
01462
01463
01464
01465
01466
01467
01468
01469
01470
01471
01472
01473
01474
01475
01476
01477
01478
01479
01480
01481
01482
01483
01484
01485
01486
01487
01488
01489
01490
01491
01492
01493
01494
01495
01496
01497
01498
01499
01500
01501
01502
01503
01504
01505
01506
01507
01508
01509
01510
01511
01512
01513
01514
01515
01516
01517
01518
01519
01520
01521
01522
01523
01524
01525
01526
01527
01528
01529
01530
01531
01532
01533
01534
01535
01536
01537
01538
01539
01540
01541
01542
01543
01544
01545
01546
01547
01548
01549
01550
01551
01552
01553
01554
01555
01556
01557
01558
01559
01560
01561
01562
01563
01564
01565
01566
01567
01568
01569
01570
01571
01572
01573
01574
01575
01576
01577
01578
01579
01580
01581
01582
01583
01584
01585
01586
01587
01588
01589
01590
01591
01592
01593
01594
01595
01596
01597
01598
01599
01600
01601
01602
01603
01604
01605
01606
01607
01608
01609
01610
01611
01612
01613
01614
01615