src/algo/blast/unit_tests/api/pssmcreate_unit_test.cpp

Go to the documentation of this file.
00001 /*  $Id: pssmcreate_unit_test.cpp 171622 2009-09-25 15:08:10Z avagyanv $
00002  * ===========================================================================
00003  *
00004  *                            PUBLIC DOMAIN NOTICE
00005  *               National Center for Biotechnology Information
00006  *
00007  *  This software/database is a "United States Government Work" under the
00008  *  terms of the United States Copyright Act.  It was written as part of
00009  *  the author's official duties as a United States Government employee and
00010  *  thus cannot be copyrighted.  This software/database is freely available
00011  *  to the public for use. The National Library of Medicine and the U.S.
00012  *  Government have not placed any restriction on its use or reproduction.
00013  *
00014  *  Although all reasonable efforts have been taken to ensure the accuracy
00015  *  and reliability of the software and data, the NLM and the U.S.
00016  *  Government do not and cannot warrant the performance or results that
00017  *  may be obtained by using this software or data. The NLM and the U.S.
00018  *  Government disclaim all warranties, express or implied, including
00019  *  warranties of performance, merchantability or fitness for any particular
00020  *  purpose.
00021  *
00022  *  Please cite the author in any work or product based on this material.
00023  *
00024  * ===========================================================================
00025  *
00026  * Author:  Christiam Camacho
00027  *
00028  */
00029 
00030 /** @file pssmcreate-cppunit.cpp
00031  * Unit test module for creation of PSSMs from multiple sequence alignments.
00032  */
00033 #include <ncbi_pch.hpp>
00034 #include <corelib/test_boost.hpp>
00035 
00036 #include <corelib/ncbi_limits.hpp>
00037 
00038 // Serial library includes
00039 #include <serial/serial.hpp>
00040 #include <serial/objistr.hpp>
00041 
00042 #include <util/random_gen.hpp>
00043 #include <util/math/matrix.hpp>
00044 
00045 // Object includes
00046 #include <objects/general/Object_id.hpp>
00047 #include <objects/seqloc/Seq_id.hpp>
00048 #include <objects/seqalign/Score.hpp>
00049 #include <objects/seqalign/Dense_seg.hpp>
00050 #include <objects/seqalign/Seq_align.hpp>
00051 #include <objects/seqalign/Seq_align_set.hpp>
00052 
00053 // ASN.1 definition for PSSM (scoremat)
00054 #include <objects/scoremat/Pssm.hpp>
00055 #include <objects/scoremat/PssmParameters.hpp>
00056 #include <objects/scoremat/PssmWithParameters.hpp>
00057 #include <objects/scoremat/PssmFinalData.hpp>
00058 #include <objects/scoremat/PssmIntermediateData.hpp>
00059 #include <objects/scoremat/FormatRpsDbParameters.hpp>
00060 
00061 // BLAST includes
00062 #include <algo/blast/api/blast_aux.hpp>
00063 #include <algo/blast/api/bl2seq.hpp>
00064 #include <algo/blast/api/pssm_engine.hpp>
00065 #include <algo/blast/api/pssm_input.hpp>
00066 #include <algo/blast/api/psi_pssm_input.hpp>
00067 #include <algo/blast/core/blast_setup.h>
00068 #include <blast_objmgr_priv.hpp>
00069 #include <blast_psi_priv.h>
00070 #include <blast_posit.h>
00071 #include "psiblast_aux_priv.hpp"    // for CScorematPssmConverter
00072 
00073 #include <algo/blast/api/blast_exception.hpp>
00074 #include <algo/blast/api/pssm_engine.hpp>
00075 
00076 // Unit test auxiliary includes
00077 #include "blast_test_util.hpp"
00078 // #include "psiblast_test_util.hpp"
00079 
00080 // Object manager includes
00081 #include <objmgr/util/sequence.hpp>
00082 
00083 // Standard scoring matrices
00084 #include <util/tables/raw_scoremat.h>
00085 
00086 // Seqport utilities
00087 #include <objects/seq/seqport_util.hpp>
00088 
00089 #include "test_objmgr.hpp"
00090 
00091 using namespace std;
00092 using namespace ncbi;
00093 using namespace ncbi::objects;
00094 using namespace ncbi::blast;
00095 
00096 /// This class exists merely to call private methods in CPsiBlastInputData
00097 /// and CPssmEngine.  Both clases declare this one as a friend.
00098 class CPssmCreateTestFixture {
00099 public:
00100    /// Gets error strings from a CPssmEngine private method
00101    /// @param error_code input integer code
00102    static string 
00103    x_ErrorCodeToString(int error_code)
00104    {
00105          return CPssmEngine::x_ErrorCodeToString(error_code); 
00106    }
00107 
00108    /// Gets Subject sequence from a CPsiBlastInputData private method
00109    /// @param ds alignment input
00110    /// @param scope allos fetching of sequence
00111    /// @param sequence_data return value for sequence.
00112    static void
00113    x_GetSubjectSequence(const objects::CDense_seg& ds, objects::CScope& scope,
00114                           string& sequence_data)
00115    {
00116          return CPsiBlastInputData::x_GetSubjectSequence(ds, scope, sequence_data);
00117    }
00118 
00119    /// Accesses CPssmEngine private method.
00120    /// Copies query sequence and adds protein sentinel bytes at the beginning
00121    /// and at the end of the sequence.
00122    /// @param query sequence to copy [in]
00123    /// @param query_length length of the sequence above [in]
00124    /// @throws CBlastException if does not have enough memory
00125    /// @return copy of query guarded by protein sentinel bytes
00126    static unsigned char*
00127    x_GuardProteinQuery(const unsigned char* query,
00128                                  unsigned int query_length)
00129    {
00130          return CPssmEngine::x_GuardProteinQuery(query, query_length);
00131    }
00132 
00133    /// Accesses CPsiBlastInputData private method.  
00134    /// Returns the number of sequences that make up the multiple sequence
00135    /// alignment
00136    /// @param input Instance of CPsiBlastInputData
00137    static unsigned int 
00138    GetNumAlignedSequences(const CPsiBlastInputData& input)
00139    {
00140          return input.GetNumAlignedSequences();
00141    }
00142 };
00143 
00144 
00145 /******************************* copied from blast_psi_cxx.cpp **************/
00146 
00147 /// Mock object for the PSSM input data which returns multiple sequence
00148 /// alignment data which has flanking gaps
00149 class CPssmInputFlankingGaps : public IPssmInputData
00150 {
00151 public:
00152     CPssmInputFlankingGaps() {
00153         const unsigned int  kQuerySize = 10;
00154         const unsigned int  kNumSeqs = 2;
00155         const unsigned char kQuery[] = { 3, 9, 14, 20, 6, 23, 1, 7, 16, 5 };
00156 
00157         m_query = new unsigned char[kQuerySize];
00158         memcpy((void*) m_query, (void*) kQuery, kQuerySize*sizeof(*kQuery));
00159 
00160         m_dim.query_length = kQuerySize;
00161         m_dim.num_seqs = kNumSeqs;
00162 
00163         m_msa = PSIMsaNew(&m_dim);
00164 
00165         for (unsigned int i = 0; i < m_dim.query_length; i++) {
00166             for (unsigned int j = 0; j < m_dim.num_seqs+1; j++) {
00167                 m_msa->data[j][i].letter = kQuery[i];
00168                 m_msa->data[j][i].is_aligned = true;
00169             }
00170         }
00171 
00172         // Add the flanking gaps
00173         m_msa->data[1][0].letter = 
00174             m_msa->data[2][0].letter = 
00175             m_msa->data[2][m_dim.query_length-1].letter = 
00176             AMINOACID_TO_NCBISTDAA[(int)'-'];
00177 
00178         m_options = NULL;
00179         PSIBlastOptionsNew(&m_options);
00180 
00181         // don't request any diagnostics data
00182         memset((void*) &m_diag_request, 0, sizeof(m_diag_request));
00183     }
00184 
00185     virtual ~CPssmInputFlankingGaps() {
00186         delete [] m_query;
00187         m_msa = PSIMsaFree(m_msa);
00188         m_options = PSIBlastOptionsFree(m_options);
00189     }
00190 
00191     void Process() {}
00192     unsigned char* GetQuery() { return m_query; }
00193     unsigned int GetQueryLength() { return m_dim.query_length; }
00194     PSIMsa* GetData() { return m_msa; }
00195     const PSIBlastOptions* GetOptions() { return m_options; }
00196     const PSIDiagnosticsRequest* GetDiagnosticsRequest() { 
00197         return &m_diag_request; 
00198     }
00199 
00200 protected:
00201 
00202     unsigned char*          m_query;
00203     PSIMsaDimensions        m_dim;
00204     PSIMsa*                 m_msa;
00205     PSIBlastOptions*        m_options;
00206     PSIDiagnosticsRequest   m_diag_request;
00207 };
00208 
00209 /// Mock object for the PSSM input data which returns a query sequence with a
00210 /// gap in it
00211 class CPssmInputGapsInQuery : public CPssmInputFlankingGaps
00212 {
00213 public:
00214     CPssmInputGapsInQuery() {
00215         // initialize multiple sequence alignment data with valid data
00216         for (unsigned int i = 0; i < m_dim.query_length; i++) {
00217             for (unsigned int j = 0; j < m_dim.num_seqs+1; j++) {
00218                 m_msa->data[j][i].letter = m_query[i];
00219                 m_msa->data[j][i].is_aligned = true;
00220             }
00221         }
00222 
00223         // Randomly assign a position in the query to contain a gap
00224         CRandom r(time(NULL));
00225         int gap_position = r.GetRand(0, GetQueryLength() - 1);
00226         m_query[gap_position] = AMINOACID_TO_NCBISTDAA[(int)'-'];
00227         m_msa->data[0][gap_position].letter = m_query[gap_position];
00228     }
00229 };
00230 
00231 /// Mock object for the PSSM input data which returns a query sequence with a
00232 /// gap in it
00233 class CPssmInputQueryLength0 : public CPssmInputFlankingGaps
00234 {
00235 public:
00236     unsigned int GetQueryLength() { return 0; }
00237 };
00238 
00239 /// Mock object for the PSSM input data which returns NULLs for all its methods
00240 class CNullPssmInput: public IPssmInputData
00241 {
00242 public:
00243     void Process() {}
00244     unsigned char* GetQuery() { return NULL; }
00245     unsigned int GetQueryLength() { return 0; }
00246     PSIMsa* GetData() { return NULL; }
00247     const PSIBlastOptions* GetOptions() { return NULL; }
00248     const char* GetMatrixName() { return NULL; }
00249     const PSIDiagnosticsRequest* GetDiagnosticsRequest() { return NULL; }
00250 };
00251 
00252 class CPssmInputUnsupportedMatrix : public CPssmInputFlankingGaps
00253 {
00254 public:
00255     const char* GetMatrixName() { return "TEST"; }
00256 };
00257 
00258 /// Mock object for the PSSM input data which can be configured to have
00259 /// different combinations of aligned sequences. Currently used to test the
00260 /// purging of biased sequences in multiple sequence alignment data
00261 class CPssmInputTestData : public CPssmInputFlankingGaps
00262 {
00263 public:
00264     // Convenience for defining an aligned segment/region in the multiple
00265     // sequence alignment
00266     typedef pair<TSeqPos, TSeqPos> TAlignedSegment;
00267 
00268     // Enumeration to specify the various data setups that can be created with
00269     // this class
00270     enum EAlignmentType {
00271         eSelfHit,           // Single pairwise alignment which is a self hit
00272         eDuplicateHit,      // 2 pairwise alignments where hits 1 and 2 are 
00273                             // identical
00274         eNearIdenticalHits, // 2 pairswise alignments where hits 1 and 2 are
00275                             // 94% identical
00276         eMsaHasUnalignedRegion, // multiple sequence alignment with 3 sequences
00277                             // (including the query) which contain a region
00278                             // where the query is unaligned to any other
00279                             // sequences, i.e.:
00280                             //   query: AAAAAAAAAABBBBBBBCCCCCCCCCCC
00281                             //   sbjct: DDDDDDDDDD------------------
00282                             //   sbjct: -----------------EEEEEEEEEEE
00283         eQueryAlignedWithInternalGaps, // multiple sequence alignment with 2
00284                             // sequences which contain regions where internal
00285                             // (as opposed to flanking) gaps are aligned to the
00286                             // query sequence, i.e.:
00287 // num_seqs: 1, query_length: 87                                                   
00288 // MFKVYGYDSNIHKCGPCDNAKRLLTVKKQPFEFINIMPEKGVFDDEKIAELLTKLGRDTQIGLTMPQVFAPDGSHIGGFDQLREYFK
00289 // KVVVFIKP----TCPFCRKTQELLSQLPFLLEFVDITAT--SDTNEIQDYLQQLTGA-----RTVPRVFIG-KECIGGCTDLESMHK
00290         eHenikoffsPaper
00291     };
00292 
00293     CPssmInputTestData(EAlignmentType type, PSIBlastOptions* opts = NULL) {
00294 
00295         // Clean up data allocated by parent class
00296         if (m_query) {
00297             delete [] m_query; 
00298             m_query = NULL;
00299             m_msa = PSIMsaFree(m_msa);
00300             m_options = PSIBlastOptionsFree(m_options);
00301         }
00302 
00303         PSIBlastOptionsNew(&m_options);
00304         if (opts) {
00305             memcpy((void*)&m_options, (void*)opts, sizeof(PSIBlastOptions));
00306         }
00307 
00308         switch (type) {
00309         case eSelfHit:
00310             SetupSelfHit();
00311             break;
00312 
00313         case eDuplicateHit:
00314             SetupDuplicateHit();
00315             break;
00316 
00317         case eNearIdenticalHits:
00318             SetupNearIdenticalHits();
00319             break;
00320 
00321         case eMsaHasUnalignedRegion:
00322             SetupMsaHasUnalignedRegion();
00323             break;
00324 
00325         case eQueryAlignedWithInternalGaps:
00326             SetupQueryAlignedWithInternalGaps();
00327             break;
00328 
00329         case eHenikoffsPaper:
00330             SetupHenikoffsPositionBasedSequenceWeights();
00331             break;
00332 
00333         default:
00334             throw std::logic_error("Unsupported alignment test data");
00335         }
00336     }
00337 
00338     ~CPssmInputTestData() {
00339         delete [] m_query;
00340         m_query = NULL;
00341         m_msa = PSIMsaFree(m_msa);
00342         m_options = PSIBlastOptionsFree(m_options);
00343     }
00344 
00345 
00346 private:
00347 // Gi 129295
00348 static const size_t kQueryLength = 232;
00349 static const Uint1 kQuery[kQueryLength];
00350 
00351      void SetupSelfHit(void) {
00352         const Uint4 kNumAlignedSeqs = 1; // does not include query
00353 
00354         m_dim.query_length = kQueryLength;
00355         m_dim.num_seqs = kNumAlignedSeqs;
00356         m_msa = PSIMsaNew(&m_dim);
00357         m_query = new unsigned char[kQueryLength];
00358 
00359         // Initialize sequence 1 with the query (self-hit)
00360         for (unsigned int i = 0; i < kQueryLength; i++) {
00361             for (unsigned int seq_idx = 0; seq_idx < kNumAlignedSeqs + 1;
00362                  seq_idx++) {
00363                 m_msa->data[seq_idx][i].letter = m_query[i] = kQuery[i];
00364                 m_msa->data[seq_idx][i].is_aligned = true;
00365             }
00366         }
00367     }
00368 
00369     Uint1 FindNonIdenticalHighScoringResidue
00370         (Uint1 res, const SNCBIPackedScoreMatrix* score_matrix)
00371     {
00372         BOOST_REQUIRE(score_matrix);
00373         Uint1 retval = AMINOACID_TO_NCBISTDAA[(int)'-'];
00374         int max_score = BLAST_SCORE_MIN;
00375 
00376         for (size_t i = 0; i < BLASTAA_SIZE; i++) {
00377             // alignment with itself is not allowed :)
00378             if (i == res) {
00379                 continue;
00380             }
00381             int score = 
00382                 static_cast<int>(NCBISM_GetScore(score_matrix, res, i));
00383             if (score > max_score) {
00384                 max_score = score;
00385                 retval = i;
00386             }
00387         }
00388         BOOST_REQUIRE(retval != AMINOACID_TO_NCBISTDAA[(int)'-']);
00389         return retval;
00390     }
00391 
00392     void SetupMsaHasUnalignedRegion(void) {
00393         const Uint4 kNumAlignedSeqs = 2;    // does not include query
00394 
00395         m_dim.query_length = kQueryLength;
00396         m_dim.num_seqs = kNumAlignedSeqs;
00397         m_msa = PSIMsaNew(&m_dim);
00398         m_query = new unsigned char[kQueryLength];
00399 
00400         // Initialize query sequence
00401         for (unsigned int i = 0; i < kQueryLength; i++) {
00402             m_msa->data[0][i].letter = m_query[i] = kQuery[i];
00403             m_msa->data[0][i].is_aligned = true;
00404         }
00405 
00406         const SNCBIPackedScoreMatrix* score_matrix = &NCBISM_Blosum62;
00407 
00408         // Initialize sequence 1 with the highest scoring residues that can be
00409         // aligned with the query for the first 100 residues
00410         // This is done so that the aligned sequences are not purged in the
00411         // first stage of PSSM creation
00412         const TAlignedSegment kFirstAlignment(0, 100);
00413         for (unsigned int i = kFirstAlignment.first; 
00414              i < kFirstAlignment.second; i++) {
00415             m_msa->data[1][i].letter = 
00416                 FindNonIdenticalHighScoringResidue(kQuery[i], score_matrix);
00417             m_msa->data[1][i].is_aligned = true;
00418         }
00419 
00420         // Initialize sequence 2 with the highest scoring residues that can be
00421         // aligned with the query for residue positions 200-kQueryLength
00422         // This is done so that the aligned sequences are not purged in the
00423         // first stage of PSSM creation
00424         const TAlignedSegment kSecondAlignment(200, kQueryLength);
00425         for (unsigned int i = kSecondAlignment.first; 
00426              i < kSecondAlignment.second; i++) {
00427             m_msa->data[2][i].letter = 
00428                 FindNonIdenticalHighScoringResidue(kQuery[i], score_matrix);
00429             m_msa->data[2][i].is_aligned = true;
00430         }
00431     }
00432 
00433     void SetupQueryAlignedWithInternalGaps() {
00434         using std::pair;
00435         using std::string;
00436         using std::vector;
00437 
00438         const Uint4 kNumAlignedSeqs = 1;
00439         const size_t kLocalQueryLength = 87;
00440 
00441         m_dim.query_length = kLocalQueryLength;
00442         m_dim.num_seqs = kNumAlignedSeqs;
00443         m_msa = PSIMsaNew(&m_dim);
00444         m_query = new unsigned char[kLocalQueryLength];
00445 
00446         string query_seq("MFKVYGYDSNIHKCGPCDNAKRLLTVKKQPFEFINIM");
00447         query_seq += string("PEKGVFDDEKIAELLTKLGRDTQIGLTMPQVFAPDGSHIGGFD");
00448         query_seq += string("QLREYFK");
00449 
00450         typedef pair<TAlignedSegment, string> TAlignedSequence;
00451         vector<TAlignedSequence> aligned_sequence;
00452 
00453         TAlignedSequence region(make_pair(make_pair(0U, 8U), 
00454                                           string("KVVVFIKP")));
00455         aligned_sequence.push_back(region);
00456 
00457         region = make_pair(make_pair(12U, 39U),
00458                            string("TCPFCRKTQELLSQLPFLLEFVDITAT"));
00459         aligned_sequence.push_back(region);
00460 
00461         region = make_pair(make_pair(41U, 57U), string("SDTNEIQDYLQQLTGA"));
00462         aligned_sequence.push_back(region);
00463 
00464         region = make_pair(make_pair(62U, 71U), string("RTVPRVFIG"));
00465         aligned_sequence.push_back(region);
00466 
00467         region = make_pair(make_pair(72U, 87U), string("KECIGGCTDLESMHK"));
00468         aligned_sequence.push_back(region);
00469 
00470 
00471         const Uint1 kGapResidue = AMINOACID_TO_NCBISTDAA[(int)'-'];
00472         for (Uint4 i = 0; i < kLocalQueryLength; i++) {
00473             m_query[i] = CSeqportUtil::GetIndex(CSeq_data::e_Ncbistdaa,
00474                                                 query_seq.substr(i, 1));
00475             m_msa->data[0][i].letter = m_query[i];
00476             m_msa->data[0][i].is_aligned = true;
00477 
00478             // align the second sequence to gaps
00479             m_msa->data[1][i].letter = kGapResidue;
00480             m_msa->data[1][i].is_aligned = true;
00481         }
00482 
00483         // Now overwrite the gaps with the aligned sequences
00484         ITERATE(vector<TAlignedSequence>, itr, aligned_sequence) {
00485             TAlignedSegment loc = itr->first;  // location in the sequence
00486             string sequence_data = itr->second;
00487 
00488             for (Uint4 i = loc.first, j = 0; i < loc.second; i++, j++) {
00489                 m_msa->data[1][i].letter = 
00490                     CSeqportUtil::GetIndex(CSeq_data::e_Ncbistdaa,
00491                                            sequence_data.substr(j, 1));
00492             }
00493         }
00494    }
00495 
00496    void SetupHenikoffsPositionBasedSequenceWeights(void) {
00497         const Uint4 kNumAlignedSeqs = 3;    // does not include query
00498         const Uint1 kQuerySequence[5] = { 7, 22, 19, 7, 17 };
00499         const Uint1 kSeq1[5] =  { 7,  6,  4, 7,  6 };
00500         const Uint1 kSeq2[5] =  { 7, 22,  4, 7,  6 };
00501         const Uint1 kSeq3[5] =  { 7, 22, 15, 7,  7 };
00502 
00503         m_dim.query_length = sizeof(kQuery);
00504         m_dim.num_seqs = kNumAlignedSeqs;
00505         m_msa = PSIMsaNew(&m_dim);
00506         m_query = new unsigned char[sizeof(kQuerySequence)];
00507 
00508         // Initialize aligned sequences
00509         for (Uint4 s = 0; s < kNumAlignedSeqs; s++) {
00510 
00511             const Uint1* sequence = NULL;
00512             switch (s) {
00513             case 0: sequence = kSeq1; break;
00514             case 1: sequence = kSeq2; break;
00515             case 2: sequence = kSeq3; break;
00516             default: abort();    // should never happen
00517             }
00518 
00519             for (Uint4 i = 0; i < sizeof(kQuerySequence); i++) {
00520                 m_query[i] = kQuerySequence[i];
00521                 m_msa->data[s][i].letter = sequence[i];
00522                 m_msa->data[s][i].is_aligned = true;
00523             }
00524         }
00525    }
00526 
00527    void SetupDuplicateHit(void) {
00528         const Uint4 kNumAlignedSeqs = 2;    // does not include query
00529 
00530         // This sequence is used as aligned sequence #1 and #2, i.e. it is a
00531         // duplicate hit
00532         const Uint1 kGi_129296_[388] = {  
00533         12,  4, 17,  9, 17, 19, 18, 13,  1, 10,  6,  3,  6,  4, 19, 
00534          6, 13,  5, 12, 10, 19,  8,  8, 19, 13,  5, 13,  9, 11, 22, 
00535          3, 14, 11, 17,  9, 11, 18,  1, 11,  1, 12, 19, 22, 11,  7, 
00536          1, 16,  7, 13, 18,  5, 17, 15, 12, 10, 10, 19, 11,  8,  6, 
00537          4, 17,  9, 18,  7,  1,  7, 17, 18, 18,  4, 17, 15,  3,  7, 
00538         17, 17,  5, 22, 19,  8, 13, 11,  6, 10,  5, 11, 11, 17,  5, 
00539          9, 18, 16, 14, 13,  1, 18, 22, 17, 11,  5,  9,  1,  4, 10, 
00540         11, 22, 19,  4, 10, 18,  6, 17, 19, 11, 14,  5, 22, 11, 17, 
00541          3,  1, 16, 10,  6, 22, 18,  7,  7, 19,  5,  5, 19, 13,  6, 
00542         10, 18,  1,  1,  5,  5,  1, 16, 15, 11,  9, 13, 17, 20, 19, 
00543          5, 10,  5, 18, 13,  7, 15,  9, 10,  4, 11, 11, 19, 17, 17, 
00544         17,  9,  4,  6,  7, 18, 18, 12, 19,  6,  9, 13, 18,  9, 22, 
00545          6, 10,  7,  9, 20, 10,  9,  1,  6, 13, 18,  5,  4, 18, 16, 
00546          5, 12, 14,  6, 17, 12, 18, 10,  5,  5, 17, 10, 14, 19, 15, 
00547         12, 12,  3, 12, 13, 13, 17,  6, 13, 19,  1, 18, 11, 14,  1, 
00548          5, 10, 12, 10,  9, 11,  5, 11, 14, 22,  1, 17,  7,  4, 11, 
00549         17, 12, 11, 19, 11, 11, 14,  4,  5, 19, 17,  7, 11,  5, 16, 
00550          9,  5, 10, 18,  9, 13,  6,  4, 10, 11, 16,  5, 20, 18, 17, 
00551         18, 13,  1, 12,  1, 10, 10, 17, 12, 10, 19, 22, 11, 14, 16, 
00552         12, 10,  9,  5,  5, 10, 22, 13, 11, 18, 17,  9, 11, 12,  1, 
00553         11,  7, 12, 18,  4, 11,  6, 17, 16, 17,  1, 13, 11, 18,  7, 
00554          9, 17, 17, 19,  4, 13, 11, 12,  9, 17,  4,  1, 19,  8,  7, 
00555         19,  6, 12,  5, 19, 13,  5,  5,  7, 18,  5,  1, 18,  7, 17, 
00556         18,  7,  1,  9,  7, 13,  9, 10,  8, 17, 11,  5, 11,  5,  5, 
00557          6, 16,  1,  4,  8, 14,  6, 11,  6,  6,  9, 16, 22, 13, 14, 
00558         18, 13,  1,  9, 11,  6,  6,  7, 16, 22, 20, 17, 14};
00559 
00560         m_dim.query_length = kQueryLength;
00561         m_dim.num_seqs = kNumAlignedSeqs;
00562         m_msa = PSIMsaNew(&m_dim);
00563         m_query = new unsigned char[kQueryLength];
00564 
00565         for (unsigned int i = 0; i < kQueryLength; i++) {
00566             m_msa->data[kQueryIndex][i].letter = m_query[i] = kQuery[i];
00567             m_msa->data[kQueryIndex][i].is_aligned = true;
00568         }
00569 
00570         for (unsigned int i = 1; i < kNumAlignedSeqs + 1; i++) {
00571             for (unsigned int j = 0; j < kQueryLength; j++) {
00572                 m_msa->data[i][j].letter = kGi_129296_[j];
00573                 m_msa->data[i][j].is_aligned = true;
00574             }
00575         }
00576    }
00577 
00578    void SetupNearIdenticalHits(void) {
00579         SetupDuplicateHit();
00580 
00581         const Uint4 kHitIndex = 2;  // index of the near identical hit
00582         const Uint4 kNumIdenticalResidues = (Uint4) (GetQueryLength() *
00583             (kPSINearIdentical + 0.01));
00584 
00585         for (Uint4 i = kNumIdenticalResidues; i < GetQueryLength(); i++) {
00586             Uint1& residue = m_msa->data[kHitIndex][i].letter;
00587             residue = (residue + 1) % BLASTAA_SIZE;
00588             BOOST_REQUIRE(residue > 0 && residue < BLASTAA_SIZE);
00589         }
00590     }
00591 };
00592 
00593 const size_t CPssmInputTestData::kQueryLength;
00594 const Uint1 CPssmInputTestData::kQuery[CPssmInputTestData::kQueryLength] = {
00595     15,  9, 10,  4, 11, 11, 19, 17, 17, 17, 18,  4, 11,  4, 18, 
00596     18, 11, 19, 11, 19, 13,  1,  9, 22,  6, 10,  7, 12, 20, 10, 
00597     18,  1,  6, 13,  1,  5,  4, 18, 16,  5, 12, 14,  6,  8, 19, 
00598     18, 10, 15,  5, 17, 10, 14, 19, 15, 12, 12,  3, 12, 13, 13, 
00599     17,  6, 13, 19,  1, 18, 11, 14,  1,  5, 10, 12, 10,  9, 11, 
00600      5, 11, 14,  6,  1, 17,  7,  4, 11, 17, 12, 11, 19, 11, 11, 
00601     14,  4,  5, 19, 17,  4, 11,  5, 16,  9,  5, 10, 18,  9, 13, 
00602      6,  5, 10, 11, 18,  5, 20, 18, 13, 14, 13, 18, 12,  5, 10, 
00603     16, 16, 19, 10, 19, 22, 11, 14, 15, 12, 10,  9,  5,  5, 10, 
00604     22, 13, 11, 18, 17, 19, 11, 12,  1, 11,  7, 12, 18,  4, 11, 
00605      6,  9, 14, 17,  1, 13, 11, 18,  7,  9, 17, 17,  1,  5, 17, 
00606     11, 10,  9, 17, 15,  1, 19,  8,  7,  1,  6, 12,  5, 11, 17, 
00607      5,  4,  7,  9,  5, 12,  1,  7, 17, 18,  7, 19,  9,  5,  4, 
00608      9, 10,  8, 17, 14,  5, 17,  5, 15,  6, 16,  1,  4,  8, 14, 
00609      6, 11,  6, 11,  9, 10,  8, 13, 14, 18, 13, 18,  9, 19, 22, 
00610      6,  7, 16, 22, 20, 17, 14};
00611 
00612 /// template specializations to automate deallocation of internal BLAST
00613 /// structures with ncbi::AutoPtr
00614 BEGIN_NCBI_SCOPE
00615 
00616 // FIXME: declare RAII classes for these?
00617 template <>
00618 struct Deleter<_PSIAlignedBlock> {
00619     static void Delete(_PSIAlignedBlock* p)
00620     { _PSIAlignedBlockFree(p); }
00621 };
00622 
00623 template <>
00624 struct Deleter<_PSISequenceWeights> {
00625     static void Delete(_PSISequenceWeights* p)
00626     { _PSISequenceWeightsFree(p); }
00627 };
00628 
00629 template <>
00630 struct Deleter<_PSIInternalPssmData> {
00631     static void Delete(_PSIInternalPssmData* p)
00632     { _PSIInternalPssmDataFree(p); }
00633 };
00634 
00635 template <>
00636 struct Deleter<_PSIMsa> {
00637     static void Delete(_PSIMsa* p)
00638     { _PSIMsaFree(p); }
00639 };
00640 
00641 template <>
00642 struct Deleter<_PSIPackedMsa> {
00643     static void Delete(_PSIPackedMsa* p)
00644     { _PSIPackedMsaFree(p); }
00645 };
00646 
00647 END_NCBI_SCOPE
00648 
00649 BOOST_FIXTURE_TEST_SUITE(pssmcreate, CPssmCreateTestFixture)
00650 
00651 
00652 /// @param query protein sequence in ncbistdaa with sentinel bytes
00653 /// @param query_size length of the query sequence (w/o including sentinel
00654 //  bytes)
00655 static BlastScoreBlk* InitializeBlastScoreBlk(const unsigned char* query,
00656                                                   Uint4 query_size) {
00657         const EBlastProgramType kProgramType = eBlastTypeBlastp;
00658         const double kScaleFactor = 1.0;
00659         Blast_Message* errors = NULL;
00660         short status = 0;
00661 
00662         // Setup the scoring options
00663         CBlastScoringOptions opts;
00664         status = BlastScoringOptionsNew(kProgramType, &opts);
00665         BOOST_REQUIRE(status == 0);
00666 
00667         // Setup the sequence block structure
00668         CBLAST_SequenceBlk query_blk;
00669         status = BlastSeqBlkNew(&query_blk);
00670         BOOST_REQUIRE(status == 0);
00671         status = BlastSeqBlkSetSequence(query_blk, query, query_size);
00672         BOOST_REQUIRE(status == 0);
00673         // don't delete the sequences upon exit!
00674         query_blk->sequence_allocated = FALSE;
00675         query_blk->sequence_start_allocated = FALSE;
00676 
00677         const Uint1 kNullByte = GetSentinelByte(eBlastEncodingProtein);
00678         BOOST_REQUIRE(query_blk.Get() != NULL);
00679         BOOST_REQUIRE(query_blk->sequence[0] != kNullByte);
00680         BOOST_REQUIRE(query_blk->sequence[query_blk->length - 1] != kNullByte);
00681         BOOST_REQUIRE(query_blk->sequence_start[0] == kNullByte);
00682         BOOST_REQUIRE(query_blk->sequence_start[query_blk->length + 1] ==
00683                        kNullByte);
00684 
00685         // Setup the query info structure
00686         CBlastQueryInfo query_info(TestUtil::CreateProtQueryInfo(query_size));
00687 
00688         BlastScoreBlk* retval = NULL;
00689         status = BlastSetup_ScoreBlkInit(query_blk,
00690                                           query_info,
00691                                           opts,
00692                                           kProgramType,
00693                                           &retval,
00694                                           kScaleFactor,
00695                                           &errors,
00696                                           &BlastFindMatrixPath);
00697         if (status) {
00698             throw runtime_error(errors->message);
00699         }
00700         BOOST_REQUIRE(retval->kbp_ideal);
00701 
00702         /*********************************************************************/
00703 
00704         return retval;
00705 }
00706 
00707 
00708 BOOST_AUTO_TEST_CASE(testFullPssmEngineRunWithDiagnosticsRequest) {
00709 
00710         const string seqalign("data/nr-129295.new.asn.short");
00711         auto_ptr<CObjectIStream> in
00712             (CObjectIStream::Open(seqalign, eSerial_AsnText));
00713 
00714         CRef<CSeq_align_set> sas(new CSeq_align_set());
00715         *in >> *sas;
00716 
00717         CSeq_id qid("gi|129295"), sid("gi|6");
00718         auto_ptr<SSeqLoc> q(CTestObjMgr::Instance().CreateSSeqLoc(qid));
00719         SBlastSequence seq(GetSequence(*q->seqloc, eBlastEncodingProtein, q->scope));
00720 
00721         CPSIBlastOptions opts;
00722         PSIBlastOptionsNew(&opts);
00723 
00724         PSIDiagnosticsRequest request;
00725         memset((void*) &request, 0, sizeof(request));
00726         request.information_content = false;        // unsupported
00727         request.residue_frequencies = true;
00728         request.weighted_residue_frequencies = true;
00729         request.frequency_ratios = true;
00730         request.gapless_column_weights = false; // unsupported
00731 
00732         CRef<IPssmInputData> pssm_strategy(
00733             new CPsiBlastInputData(seq.data.get()+1,
00734                                    seq.length-2, // don't count sentinels
00735                                    sas, q->scope, 
00736                                    *opts, 
00737                                    "BLOSUM80",
00738                                    &request));
00739         CRef<CPssmEngine> pssm_engine(new CPssmEngine(pssm_strategy));
00740         CRef<CPssmWithParameters> pssm = pssm_engine->Run();
00741 
00742         const size_t kNumElements = 
00743             pssm_strategy->GetQueryLength() * BLASTAA_SIZE;
00744         // Verify the residue frequencies came back
00745         const CPssmIntermediateData::TResFreqsPerPos& res_freqs =
00746             pssm->GetPssm().GetIntermediateData().GetResFreqsPerPos();
00747         BOOST_REQUIRE_EQUAL(kNumElements, res_freqs.size());
00748 
00749         const CPssmIntermediateData::TWeightedResFreqsPerPos& wres_freqs =
00750             pssm->GetPssm().GetIntermediateData().GetWeightedResFreqsPerPos();
00751         BOOST_REQUIRE_EQUAL(kNumElements, wres_freqs.size());
00752 
00753         const CPssmIntermediateData::TFreqRatios& freq_ratios = 
00754             pssm->GetPssm().GetIntermediateData().GetFreqRatios();
00755         BOOST_REQUIRE_EQUAL(kNumElements, freq_ratios.size());
00756 
00757         //TestUtil::PrintTextAsn1Object("pssm-diags.asn", &*pssm);
00758 
00759         // Test the unsupported diagnostics
00760         pssm_strategy.Reset();
00761         pssm_engine.Reset();
00762         memset((void*) &request, 0, sizeof(request));
00763         request.information_content = true;
00764 
00765         pssm_strategy.Reset(
00766                 new CPsiBlastInputData(seq.data.get(),
00767                                        seq.length,
00768                                        sas, q->scope, 
00769                                        *opts, 
00770                                        "BLOSUM80",
00771                                        &request));
00772         pssm_engine.Reset(new CPssmEngine(pssm_strategy));
00773         BOOST_CHECK_THROW(pssm_engine->Run(), CBlastException);
00774 
00775         pssm_strategy.Reset();
00776         pssm_engine.Reset();
00777 
00778         // Test the unsupported diagnostics
00779         memset((void*) &request, 0, sizeof(request));
00780         request.gapless_column_weights = true;
00781 
00782         pssm_strategy.Reset(
00783                 new CPsiBlastInputData(seq.data.get(),
00784                                        seq.length,
00785                                        sas, q->scope, 
00786                                        *opts, 
00787                                        "BLOSUM80",
00788                                        &request));
00789         pssm_engine.Reset(new CPssmEngine(pssm_strategy));
00790         BOOST_CHECK_THROW(pssm_engine->Run(), CBlastException);
00791 }
00792 
00793 // test sequence alignment convertion to multiple sequence alignment
00794 // structure
00795 BOOST_AUTO_TEST_CASE(testSeqAlignToPsiBlastMultipleSequenceAlignment) {
00796         
00797         /*** Setup code ***/
00798         CSeq_id qid("gi|129295"), sid("gi|6");
00799         auto_ptr<SSeqLoc> q(CTestObjMgr::Instance().CreateSSeqLoc(qid));
00800         auto_ptr<SSeqLoc> s(CTestObjMgr::Instance().CreateSSeqLoc(sid));
00801         CBl2Seq blaster(*q, *s, eBlastp);
00802         TSeqAlignVector sasv = blaster.Run();
00803         BOOST_REQUIRE(sasv.size() != 0);
00804 
00805         CPSIBlastOptions opts;
00806         PSIBlastOptionsNew(&opts);
00807 
00808         opts->inclusion_ethresh = BLAST_EXPECT_VALUE;
00809         opts->use_best_alignment = FALSE;
00810 
00811         // Retrieve the query sequence, but skip the sentinel bytes!
00812         SBlastSequence seq(GetSequence(*q->seqloc, eBlastEncodingProtein, q->scope));
00813 
00814         try {
00815             auto_ptr<CPsiBlastInputData> pssm_input(
00816                 new CPsiBlastInputData(seq.data.get()+1,
00817                                        seq.length-2,
00818                                        sasv[0], q->scope, *opts));
00819             // Create the score matrix builder!
00820             CPssmEngine pssm_engine(pssm_input.get());
00821             pssm_input->Process();
00822             // include query
00823             TSeqPos nseqs = CPssmCreateTestFixture::GetNumAlignedSequences(*pssm_input) + 1; 
00824 
00825         /*** End Setup code ***/
00826 
00827             // Actual unit tests follow:
00828             // Walk through the alignment segments and ensure m_AlignmentData
00829             // is filled properly
00830 
00831             TSeqPos seq_index = 1; // skip the query sequence
00832                 const PSIMsaCell kNullPSIMsaCell = { 
00833                     (unsigned char) 0,              // letter
00834                     false                           // is_aligned
00835                 };
00836 
00837                 // vector to keep track of aligned positions of a particular
00838                 // subject w.r.t the query/query sequence
00839                 vector<PSIMsaCell> aligned_pos(pssm_input->GetQueryLength());
00840                 fill(aligned_pos.begin(), aligned_pos.end(), kNullPSIMsaCell);
00841 
00842                 // Iterate over all HSPs and populate the aligned_pos vector.
00843                 // This should be identical to what the pssm_engine object 
00844                 // calculated.
00845                 ITERATE(CSeq_align_set::Tdata, hsp, sasv[0]->Get()) {
00846                     const CDense_seg& ds = (*hsp)->GetSegs().GetDenseg();
00847                     string subj;
00848                     CPssmCreateTestFixture::x_GetSubjectSequence(ds, 
00849                                                              *s->scope, subj);
00850                     const vector<TSignedSeqPos>& starts = ds.GetStarts();
00851                     const vector<TSeqPos>& lengths = ds.GetLens();
00852 
00853                     for (int i = 0; i < ds.GetNumseg(); i++) {
00854                         TSignedSeqPos q_index = starts[i*ds.GetDim()];
00855                         TSignedSeqPos s_index = starts[i*ds.GetDim()+1];
00856 // FIXME
00857 #define GAP_IN_ALIGNMENT -1
00858                         if (s_index == (int)GAP_IN_ALIGNMENT) {
00859                             for (TSeqPos pos = 0; pos < lengths[i]; pos++) {
00860                                 PSIMsaCell& pd = aligned_pos[q_index++];
00861                                 pd.letter = AMINOACID_TO_NCBISTDAA[(Uint1)'-'];
00862                                 pd.is_aligned = true;
00863                             }
00864                         } else if (q_index == (int)GAP_IN_ALIGNMENT) {
00865                             s_index += lengths[i];
00866                             continue;
00867                         } else {
00868                             s_index = (i == 0) ? 0 : (s_index - starts[1]);
00869                             for (TSeqPos pos = 0; pos < lengths[i]; pos++) {
00870                                 PSIMsaCell& pd = aligned_pos[q_index++];
00871                                 pd.letter = subj[s_index++];
00872                                 pd.is_aligned = true;
00873                             }
00874                         }
00875                     }
00876                 }
00877 
00878                 stringstream ss;
00879                 // Now compare each position for this sequence
00880                 for (TSeqPos i = 0; i < pssm_input->GetQueryLength(); i++) {
00881                     BOOST_REQUIRE(seq_index < nseqs);
00882                     const PSIMsaCell& pos_desc = 
00883                         pssm_input->GetData()->data[seq_index][i];
00884                     ss.str("");
00885                     ss << "Sequence " << seq_index << ", position " << i 
00886                        << " differ";
00887                     BOOST_REQUIRE_MESSAGE(aligned_pos[i].letter == pos_desc.letter && 
00888                          aligned_pos[i].is_aligned == pos_desc.is_aligned, ss.str());
00889                 }
00890 
00891                 seq_index++;
00892         } catch (const exception& e) {  
00893             cerr << e.what() << endl; 
00894             BOOST_REQUIRE(false);
00895         } catch (...) {  
00896             cerr << "Unknown exception" << endl; 
00897             BOOST_REQUIRE(false);
00898         }
00899 }
00900 
00901 /// Unit test the individual stages of the PSSM creation algorithm (core
00902 /// layer):
00903 /// 1. purged biased sequences
00904 BOOST_AUTO_TEST_CASE(testPurgeSequencesWithNull) {
00905         int rv = _PSIPurgeBiasedSegments(NULL);
00906         BOOST_REQUIRE_EQUAL(PSIERR_BADPARAM, rv);
00907 }
00908 
00909 BOOST_AUTO_TEST_CASE(testPurgeSelfHit) {
00910         auto_ptr<IPssmInputData> pssm_input
00911             (new CPssmInputTestData(CPssmInputTestData::eSelfHit));
00912         pssm_input->Process();  // standard calling convention
00913         AutoPtr<_PSIPackedMsa> msa(_PSIPackedMsaNew(pssm_input->GetData()));
00914         int rv = _PSIPurgeBiasedSegments(msa.get());
00915         BOOST_REQUIRE_EQUAL(PSI_SUCCESS, rv);    
00916         const Uint4 kSelfHitIndex = 1;
00917         BOOST_REQUIRE_EQUAL(true, (bool) msa->use_sequence[kQueryIndex]);
00918         BOOST_REQUIRE_EQUAL(false, (bool) msa->use_sequence[kSelfHitIndex]);
00919 }
00920 
00921 BOOST_AUTO_TEST_CASE(testPurgeDuplicateHit) {
00922         auto_ptr<IPssmInputData> pssm_input
00923             (new CPssmInputTestData(CPssmInputTestData::eDuplicateHit));
00924         pssm_input->Process();  // standard calling convention
00925         AutoPtr<_PSIPackedMsa> msa(_PSIPackedMsaNew(pssm_input->GetData()));
00926         int rv = _PSIPurgeBiasedSegments(msa.get());
00927         BOOST_REQUIRE_EQUAL(PSI_SUCCESS, rv);    
00928         const Uint4 kDuplicateHitIndex = 2;
00929         BOOST_REQUIRE_EQUAL(false, 
00930                              (bool) msa->use_sequence[kDuplicateHitIndex]);
00931         BOOST_REQUIRE_EQUAL(true, (bool) msa->use_sequence[kQueryIndex]);
00932         BOOST_REQUIRE_EQUAL(true, (bool) msa->use_sequence[kQueryIndex + 1]);
00933 }
00934 
00935 BOOST_AUTO_TEST_CASE(testPurgeNearIdenticalHits) {
00936         auto_ptr<IPssmInputData> pssm_input
00937             (new CPssmInputTestData(CPssmInputTestData::eNearIdenticalHits));
00938         pssm_input->Process();  // standard calling convention
00939         AutoPtr<_PSIPackedMsa> msa(_PSIPackedMsaNew(pssm_input->GetData()));
00940         int rv = _PSIPurgeBiasedSegments(msa.get());
00941         BOOST_REQUIRE_EQUAL(PSI_SUCCESS, rv);    
00942         const Uint4 kRemovedHitIndex = 2;
00943         BOOST_REQUIRE_EQUAL(false, 
00944                              (bool) msa->use_sequence[kRemovedHitIndex]);
00945         BOOST_REQUIRE_EQUAL(true, (bool) msa->use_sequence[kQueryIndex]);
00946         BOOST_REQUIRE_EQUAL(true, (bool) msa->use_sequence[kQueryIndex + 1]);
00947 }
00948 
00949 BOOST_AUTO_TEST_CASE(testQueryAlignedWithInternalGaps) {
00950         auto_ptr<IPssmInputData> pssm_input
00951             (new CPssmInputTestData
00952              (CPssmInputTestData::eQueryAlignedWithInternalGaps));
00953         BOOST_REQUIRE_EQUAL(string("BLOSUM62"),
00954                              string(pssm_input->GetMatrixName()));
00955         CPssmEngine pssm_engine(pssm_input.get());
00956         CRef<CPssmWithParameters> pssm_asn = pssm_engine.Run();
00957 
00958         auto_ptr< CNcbiMatrix<int> > pssm
00959             (CScorematPssmConverter::GetScores(*pssm_asn));
00960 
00961         /* Make sure that the resulting PSSM's scores are based on the scores
00962          * of the underlying scoring matrix and the query sequence (i.e.: the
00963          * PSSM scores should be within one or two values from those in the
00964          * underlying scoring matrix) */
00965         
00966         const SNCBIPackedScoreMatrix* score_matrix = &NCBISM_Blosum62;
00967         const Uint1 kGapResidue = AMINOACID_TO_NCBISTDAA[(int)'-'];
00968         stringstream ss;
00969         BOOST_REQUIRE_EQUAL((size_t)pssm_asn->GetPssm().GetNumColumns(),
00970                              (size_t)pssm->GetCols());
00971         BOOST_REQUIRE_EQUAL((size_t)pssm_asn->GetPssm().GetNumRows(),
00972                              (size_t)pssm->GetRows());
00973         for (int i = 0; i < pssm_asn->GetPssm().GetNumColumns(); i++) {
00974             for (int j = 0; j < pssm_asn->GetPssm().GetNumRows(); j++) {
00975 
00976                 // Exceptional residues get value of BLAST_SCORE_MIN
00977                 if (j == kGapResidue) {
00978                     ss.str("");
00979                     ss << "Position " << i << " residue " 
00980                        << TestUtil::GetResidue(j) << " differ on PSSM";
00981                     BOOST_REQUIRE_MESSAGE(BLAST_SCORE_MIN == (*pssm)(j, i), ss.str());
00982                 } else {
00983                     int score = 
00984                         (int)NCBISM_GetScore(score_matrix,
00985                                              pssm_input->GetQuery()[i], j);
00986 
00987                     ss.str("");
00988                     ss << "Position " << i << " residue " 
00989                        << TestUtil::GetResidue(j) << " differ on PSSM: "
00990                        << "expected=" << NStr::IntToString(score) 
00991                        << " actual=" << NStr::IntToString((*pssm)(j, i));
00992                     BOOST_REQUIRE_MESSAGE (score-1 <= (*pssm)(j, i) || (*pssm)(j, i) <= score+1, ss.str());
00993                 }
00994             }
00995         }
00996 }
00997     
00998 BOOST_AUTO_TEST_CASE(testMultiSeqAlignmentHasRegionsUnalignedToQuery) {
00999         auto_ptr<IPssmInputData> pssm_input
01000             (new
01001              CPssmInputTestData(CPssmInputTestData::eMsaHasUnalignedRegion));
01002         pssm_input->Process();  // standard calling convention
01003         BOOST_REQUIRE_EQUAL(string("BLOSUM62"),
01004                              string(pssm_input->GetMatrixName()));
01005 
01006 
01007         /*** Run the stage to purge biased alignment segments */
01008         AutoPtr<_PSIPackedMsa> packed_msa
01009             (_PSIPackedMsaNew(pssm_input->GetData()));
01010         int rv = _PSIPurgeBiasedSegments(packed_msa.get());
01011         BOOST_REQUIRE_EQUAL(PSI_SUCCESS, rv);    
01012         BOOST_REQUIRE_EQUAL(true, 
01013                              (bool) packed_msa->use_sequence[kQueryIndex]);
01014         BOOST_REQUIRE_EQUAL(true, (bool) packed_msa->use_sequence[1]);
01015         BOOST_REQUIRE_EQUAL(true, (bool) packed_msa->use_sequence[2]);
01016 
01017         AutoPtr<_PSIMsa> msa(_PSIMsaNew(packed_msa.get(), BLASTAA_SIZE));
01018         /*** Run the stage to calculate alignment extents */
01019         CPSIBlastOptions opts;
01020         PSIBlastOptionsNew(&opts);
01021         AutoPtr<_PSIAlignedBlock> aligned_blocks(
01022             _PSIAlignedBlockNew(pssm_input->GetQueryLength()));
01023         rv = _PSIComputeAlignmentBlocks(msa.get(), aligned_blocks.get());
01024         stringstream ss;
01025         ss << "_PSIComputeAlignmentBlocks failed: " 
01026            << CPssmCreateTestFixture::x_ErrorCodeToString(rv);
01027         BOOST_REQUIRE_MESSAGE(PSI_SUCCESS == rv, ss.str());
01028 
01029         // Verify the alignment extents for aligned regions to the query
01030         vector<CPssmInputTestData::TAlignedSegment> aligned_regions;
01031         aligned_regions.push_back(make_pair(0U, 99U));
01032         aligned_regions.push_back(make_pair(200U,
01033                                             pssm_input->GetQueryLength()-1));
01034 
01035         for (vector<CPssmInputTestData::TAlignedSegment>::const_iterator i =
01036              aligned_regions.begin();
01037              i != aligned_regions.end(); ++i) {
01038             for (TSeqPos pos = i->first; pos < i->second; pos++) {
01039                 ss.str("");
01040                 ss << "Alignment extents differ at position " 
01041                    << NStr::IntToString(pos);
01042                 BOOST_REQUIRE_MESSAGE((int)i->first == (int)aligned_blocks->pos_extnt[pos].left, ss.str());
01043                 BOOST_REQUIRE_MESSAGE((int)i->second == (int)aligned_blocks->pos_extnt[pos].right, ss.str());
01044                 BOOST_REQUIRE_MESSAGE( (int)(i->second - i->first + 1) == (int)aligned_blocks->size[pos], ss.str());
01045             }
01046         }
01047 
01048         // Verify the alignment extents for unaligned regions to the query
01049         const CPssmInputTestData::TAlignedSegment kUnalignedRange(100, 200); 
01050         for (size_t i = kUnalignedRange.first; 
01051              i < kUnalignedRange.second; i++) {
01052             ss.str("");
01053             ss << "Alignment extents differ at position " 
01054                << NStr::IntToString(i);
01055             BOOST_REQUIRE_MESSAGE((int)-1 == (int)aligned_blocks->pos_extnt[i].left, ss.str());
01056             BOOST_REQUIRE_MESSAGE( (int)pssm_input->GetQueryLength() == (int)aligned_blocks->pos_extnt[i].right, ss.str());
01057             BOOST_REQUIRE_MESSAGE(
01058                 (int)(aligned_blocks->pos_extnt[i].right - aligned_blocks->pos_extnt[i].left + 1) == (int)aligned_blocks->size[i],
01059                 ss.str());
01060         }
01061 
01062         /*** Run the stage to compute the sequence weights */
01063         blast::TAutoUint1Ptr query_with_sentinels
01064             (CPssmCreateTestFixture::x_GuardProteinQuery(pssm_input->GetQuery(),
01065                                               pssm_input->GetQueryLength()));;
01066         CBlastScoreBlk sbp;
01067         sbp.Reset
01068             (InitializeBlastScoreBlk
01069                 (query_with_sentinels.get(), pssm_input->GetQueryLength()));
01070         AutoPtr<_PSISequenceWeights> seq_weights(
01071             _PSISequenceWeightsNew(msa->dimensions, 
01072                                    sbp));
01073         rv = _PSIComputeSequenceWeights(msa.get(), aligned_blocks.get(),
01074                                         opts->nsg_compatibility_mode,
01075                                         seq_weights.get());
01076         ss.str("");
01077         ss << "_PSIComputeSequenceWeights failed: "
01078            << CPssmCreateTestFixture::x_ErrorCodeToString(rv);
01079         BOOST_REQUIRE_MESSAGE(PSI_SUCCESS == rv, ss.str());
01080 
01081         // Verify the validity of sequence weights corresponding to the aligned
01082         // regions
01083         BOOST_REQUIRE_EQUAL(false, (bool)opts->nsg_compatibility_mode);
01084         const Uint1 kXResidue = AMINOACID_TO_NCBISTDAA[(int)'X'];
01085         for (vector<CPssmInputTestData::TAlignedSegment>::const_iterator i =
01086              aligned_regions.begin();
01087              i != aligned_regions.end(); ++i) {
01088             for (TSeqPos pos = i->first; pos < i->second; pos++) {
01089                 double total_sequence_weights_for_column = 0.0;
01090                 for (size_t res = 0; res < msa->alphabet_size; res++) {
01091                     if (res == kXResidue) continue;
01092                     total_sequence_weights_for_column +=
01093                         seq_weights->match_weights[pos][res];
01094                 }
01095                 BOOST_REQUIRE(total_sequence_weights_for_column > 0.99 &&
01096                                total_sequence_weights_for_column < 1.01);
01097             }
01098         }
01099         // Verify that the unaligned sequence weights are all zero's
01100         for (size_t pos = kUnalignedRange.first; 
01101              pos < kUnalignedRange.second; pos++) {
01102             double total_sequence_weights_for_column = 0.0;
01103             for (size_t res = 0; res < msa->alphabet_size; res++) {
01104                 if (res == kXResidue) continue;
01105                 total_sequence_weights_for_column +=
01106                     seq_weights->match_weights[pos][res];
01107             }
01108             BOOST_REQUIRE(total_sequence_weights_for_column == 0.0);
01109         }
01110 
01111         /*** run the stage to compute the PSSM's frequency ratios ***/
01112         AutoPtr<_PSIInternalPssmData> internal_pssm(
01113             _PSIInternalPssmDataNew(pssm_input->GetQueryLength(), 
01114                                     sbp->alphabet_size));
01115         rv = _PSIComputeFreqRatios(msa.get(), seq_weights.get(), sbp,
01116                                    aligned_blocks.get(), opts->pseudo_count,
01117                                    opts->nsg_compatibility_mode,
01118                                    internal_pssm.get());
01119         ss.str("");
01120         ss << "_PSIComputeResidueFrequencies failed: "
01121            << CPssmCreateTestFixture::x_ErrorCodeToString(rv);
01122         BOOST_REQUIRE_MESSAGE(PSI_SUCCESS == rv, ss.str());
01123 
01124         /***** Run the stage to convert residue frequencies to PSSM **********/
01125         rv = _PSIConvertFreqRatiosToPSSM(internal_pssm.get(),
01126                                          msa->query,
01127                                          sbp,
01128                                          seq_weights->std_prob);
01129         ss.str("");
01130         ss << "_PSIConvertResidueFreqsToPSSM failed: "
01131            << CPssmCreateTestFixture::x_ErrorCodeToString(rv);
01132         BOOST_REQUIRE_MESSAGE(PSI_SUCCESS == rv, ss.str());
01133 
01134         /**************** Run the stage to scale the PSSM ********************/
01135         rv = _PSIScaleMatrix(msa->query,
01136                              seq_weights->std_prob,
01137                              internal_pssm.get(),
01138                              sbp);
01139         ss.str("");
01140         ss << "_PSIScaleMatrix failed: " 
01141            << CPssmCreateTestFixture::x_ErrorCodeToString(rv);
01142         BOOST_REQUIRE_MESSAGE(PSI_SUCCESS == rv, ss.str());
01143 
01144         /* Make sure that the resulting PSSM's scores are based on the scores
01145          * of the underlying scoring matrix and the query sequence (i.e.: the
01146          * PSSM scores should be within one or two values from those in the
01147          * underlying scoring matrix) */
01148         const SNCBIPackedScoreMatrix* score_matrix = &NCBISM_Blosum62;
01149         const Uint1 kGapResidue = AMINOACID_TO_NCBISTDAA[(int)'-'];
01150         for (Uint4 i = 0; i < pssm_input->GetQueryLength(); i++) {
01151             for (Uint4 j = 0; j < (Uint4) sbp->alphabet_size; j++) {
01152 
01153                 // Exceptional residues get value of BLAST_SCORE_MIN
01154                 if (j == kGapResidue) {
01155                     ss.str("");
01156                     ss << "Position " << i << " residue " 
01157                        << TestUtil::GetResidue(j) << " differ on PSSM";
01158                     BOOST_REQUIRE_MESSAGE(BLAST_SCORE_MIN == internal_pssm->pssm[i][j], ss.str());
01159                 } else {
01160                     int score = 
01161                         (int)NCBISM_GetScore(score_matrix, msa->query[i], j);
01162 
01163                     ss.str("");
01164                     ss << "Position " << i << " residue " 
01165                        << TestUtil::GetResidue(j) << " differ on PSSM: "
01166                        << "expected=" << NStr::IntToString(score) 
01167                        << " actual=" <<
01168                        NStr::IntToString(internal_pssm->pssm[i][j]);
01169                     BOOST_REQUIRE_MESSAGE(score-1 <= internal_pssm->pssm[i][j] || internal_pssm->pssm[i][j] <= score+1, ss.str());
01170                 }
01171             }
01172         }
01173 }
01174 
01175 /// test the case when only a segment of the query sequence is the only
01176 /// aligned sequence in the multiple sequence alignment.
01177 /// The scores in the PSSM should be based on the underlying scoring matrix
01178 BOOST_AUTO_TEST_CASE(testQueryIsOnlyAlignedSequenceInMsa) {
01179         auto_ptr<IPssmInputData> pssm_input
01180             (new CPssmInputTestData(CPssmInputTestData::eSelfHit));
01181         pssm_input->Process();  // standard calling convention
01182         BOOST_REQUIRE_EQUAL(string("BLOSUM62"),
01183                              string(pssm_input->GetMatrixName()));
01184 
01185 
01186         /*** Run the stage to purge biased alignment segments */
01187         AutoPtr<_PSIPackedMsa> packed_msa
01188             (_PSIPackedMsaNew(pssm_input->GetData()));
01189         int rv = _PSIPurgeBiasedSegments(packed_msa.get());
01190         BOOST_REQUIRE_EQUAL(PSI_SUCCESS, rv);    
01191         const Uint4 kSelfHitIndex = 1;
01192         BOOST_REQUIRE_EQUAL(true, 
01193                              (bool) packed_msa->use_sequence[kQueryIndex]);
01194         BOOST_REQUIRE_EQUAL(false, 
01195                              (bool) packed_msa->use_sequence[kSelfHitIndex]);
01196 
01197         AutoPtr<_PSIMsa> msa(_PSIMsaNew(packed_msa.get(), BLASTAA_SIZE));
01198         /*** Run the stage to calculate alignment extents */
01199         CPSIBlastOptions opts;
01200         PSIBlastOptionsNew(&opts);
01201         AutoPtr<_PSIAlignedBlock> aligned_blocks(
01202             _PSIAlignedBlockNew(pssm_input->GetQueryLength()));
01203         rv = _PSIComputeAlignmentBlocks(msa.get(), aligned_blocks.get());
01204         stringstream ss;
01205         ss << "_PSIComputeAlignmentBlocks failed: " 
01206            << CPssmCreateTestFixture::x_ErrorCodeToString(rv);
01207         BOOST_REQUIRE_MESSAGE(PSI_SUCCESS == rv, ss.str());
01208 
01209         for (size_t i = 0; i < pssm_input->GetQueryLength(); i++) {
01210             BOOST_REQUIRE_EQUAL((int)-1, 
01211                                  (int)aligned_blocks->pos_extnt[i].left);
01212             BOOST_REQUIRE_EQUAL((int)pssm_input->GetQueryLength(),
01213                                  (int)aligned_blocks->pos_extnt[i].right);
01214             BOOST_REQUIRE_EQUAL((int)pssm_input->GetQueryLength() + 2,
01215                                  (int)aligned_blocks->size[i]);
01216         }
01217 
01218         /*** Run the stage to compute the sequence weights */
01219         blast::TAutoUint1Ptr query_with_sentinels
01220             (CPssmCreateTestFixture::x_GuardProteinQuery(pssm_input->GetQuery(),
01221                                               pssm_input->GetQueryLength()));;
01222         CBlastScoreBlk sbp;
01223         sbp.Reset
01224             (InitializeBlastScoreBlk
01225                 (query_with_sentinels.get(), pssm_input->GetQueryLength()));
01226         AutoPtr<_PSISequenceWeights> seq_weights(
01227             _PSISequenceWeightsNew(msa->dimensions, 
01228                                    sbp));
01229         rv = _PSIComputeSequenceWeights(msa.get(), aligned_blocks.get(),
01230                                         // N.B.: we're deliberately ignoring
01231                                         // the sequence weights check!!!!
01232                                         TRUE,
01233                                         seq_weights.get());
01234         ss.str("");
01235         ss << "_PSIComputeSequenceWeights failed: "
01236            << CPssmCreateTestFixture::x_ErrorCodeToString(rv);
01237         BOOST_REQUIRE_MESSAGE(PSI_SUCCESS == rv, ss.str());
01238 
01239         /*** run the stage to compute the PSSM's frequency ratios ***/
01240         AutoPtr<_PSIInternalPssmData> internal_pssm(
01241             _PSIInternalPssmDataNew(pssm_input->GetQueryLength(), 
01242                                     sbp->alphabet_size));
01243         rv = _PSIComputeFreqRatios(msa.get(), seq_weights.get(), sbp,
01244                                    aligned_blocks.get(), opts->pseudo_count,
01245                                    opts->nsg_compatibility_mode,
01246                                    internal_pssm.get());
01247         ss.str("");
01248         ss << "_PSIComputeResidueFrequencies failed: "
01249            << CPssmCreateTestFixture::x_ErrorCodeToString(rv);
01250         BOOST_REQUIRE_MESSAGE(PSI_SUCCESS == rv, ss.str());
01251 
01252         /***** Run the stage to convert residue frequencies to PSSM **********/
01253         rv = _PSIConvertFreqRatiosToPSSM(internal_pssm.get(),
01254                                          msa->query,
01255                                          sbp,
01256                                          seq_weights->std_prob);
01257         ss.str("");
01258         ss << "_PSIConvertResidueFreqsToPSSM failed: "
01259            << CPssmCreateTestFixture::x_ErrorCodeToString(rv);
01260         BOOST_REQUIRE_MESSAGE(PSI_SUCCESS == rv, ss.str());
01261 
01262         /**************** Run the stage to scale the PSSM ********************/
01263         rv = _PSIScaleMatrix(msa->query,
01264                              seq_weights->std_prob,
01265                              internal_pssm.get(),
01266                              sbp);
01267         ss.str("");
01268         ss << "_PSIScaleMatrix failed: " 
01269            << CPssmCreateTestFixture::x_ErrorCodeToString(rv);
01270         BOOST_REQUIRE_MESSAGE(PSI_SUCCESS == rv, ss.str());
01271 
01272         /* Make sure that the resulting PSSM's scores are based on the scores
01273          * of the underlying scoring matrix and the query sequence (i.e.: the
01274          * PSSM scores should be within one or two values from those in the
01275          * underlying scoring matrix) */
01276         const SNCBIPackedScoreMatrix* score_matrix = &NCBISM_Blosum62;
01277         const Uint1 kGapResidue = AMINOACID_TO_NCBISTDAA[(int)'-'];
01278         for (Uint4 i = 0; i < pssm_input->GetQueryLength(); i++) {
01279             for (Uint4 j = 0; j < (Uint4) sbp->alphabet_size; j++) {
01280 
01281                 // Exceptional residues get value of BLAST_SCORE_MIN
01282                 if (j == kGapResidue) {
01283                     ss.str("");
01284                     ss << "Position " << i << " residue " 
01285                        << TestUtil::GetResidue(j) << " differ on PSSM";
01286                     BOOST_REQUIRE_MESSAGE(BLAST_SCORE_MIN == internal_pssm->pssm[i][j], ss.str());
01287                 } else {
01288                     int score = 
01289                         (int)NCBISM_GetScore(score_matrix, msa->query[i], j);
01290 
01291                     ss.str("");
01292                     ss << "Position " << i << " residue " 
01293                        << TestUtil::GetResidue(j) << " differ on PSSM: "
01294                        << "expected=" << NStr::IntToString(score) 
01295                        << " actual=" <<
01296                        NStr::IntToString(internal_pssm->pssm[i][j]);
01297                     BOOST_REQUIRE_MESSAGE(score-1 <= internal_pssm->pssm[i][j] || internal_pssm->pssm[i][j] <= score+1, ss.str());
01298                 }
01299             }
01300         }
01301 }
01302 
01303 BOOST_AUTO_TEST_CASE(testRejectFlankingGaps) {
01304         auto_ptr<IPssmInputData> bad_pssm_data(new CPssmInputFlankingGaps());
01305         CPssmEngine pssm_engine(bad_pssm_data.get());
01306         BOOST_REQUIRE_THROW(pssm_engine.Run(), CBlastException);
01307 }
01308 
01309 BOOST_AUTO_TEST_CASE(testRejectGapInQuery) {
01310         auto_ptr<IPssmInputData> bad_pssm_data(new CPssmInputGapsInQuery());
01311         CPssmEngine pssm_engine(bad_pssm_data.get());
01312         BOOST_REQUIRE_THROW(pssm_engine.Run(), CBlastException);
01313 }
01314 
01315 BOOST_AUTO_TEST_CASE(testRejectQueryLength0) {
01316         auto_ptr<IPssmInputData> bad_pssm_data(new CPssmInputQueryLength0());
01317         BOOST_REQUIRE_THROW(CPssmEngine pssm_engine(bad_pssm_data.get()), CPssmEngineException);
01318 }
01319 
01320 BOOST_AUTO_TEST_CASE(testRejectNullPssmInputData) {
01321         IPssmInputData* null_ptr = NULL;
01322         BOOST_REQUIRE_THROW(CPssmEngine pssm_engine(null_ptr), CPssmEngineException);
01323 }
01324 
01325 BOOST_AUTO_TEST_CASE(testRejectNullsReturnedByPssmInput) {
01326         auto_ptr<IPssmInputData> bad_pssm_data(new CNullPssmInput());
01327          BOOST_REQUIRE_THROW(CPssmEngine pssm_engine(bad_pssm_data.get()), CBlastException);
01328 }
01329 
01330 BOOST_AUTO_TEST_CASE(testRejectUnsupportedMatrix) {
01331         auto_ptr<IPssmInputData> bad_pssm_data(new
01332                                                CPssmInputUnsupportedMatrix());
01333         BOOST_REQUIRE_THROW(CPssmEngine pssm_engine(bad_pssm_data.get()), CBlastException);
01334 }
01335 
01336 // Deliberately ask for an alignment data structure that too large to test
01337 // the error handling. Should not be run under valgrind
01338 BOOST_AUTO_TEST_CASE(testPsiAlignmentDataCreation_TooMuchMemory) {
01339         const PSIMsaDimensions kDimensions = { ncbi::numeric_limits<int>::max(),
01340                                 ncbi::numeric_limits<int>::max() };
01341         PSIMsa* msa = PSIMsaNew(&kDimensions);
01342         BOOST_REQUIRE(msa == NULL);
01343 }
01344 
01345 
01346 BOOST_AUTO_TEST_SUITE_END()
01347 
01348 /*
01349 * ===========================================================================
01350 *
01351 * $Log: pssmcreate-cppunit.cpp,v $
01352 * Revision 1.86  2008/03/13 19:41:58  camacho
01353 * Bring up to date with current CScorematPssmConverter interface
01354 *
01355 * Revision 1.85  2007/12/07 17:19:17  camacho
01356 * Bring in sync with svn revision 115203
01357 *
01358 * Revision 1.84  2007/04/10 18:24:36  madden
01359 * Remove discontinuous seq-aligns
01360 *
01361 * Revision 1.83  2007/01/23 18:02:19  camacho
01362 * + new parameter to posPurgeMatches
01363 *
01364 * Revision 1.82  2006/11/17 17:58:01  camacho
01365 * Update to use new definition of CPsiBlastInputData::x_GetSubjectSequence
01366 *
01367 * Revision 1.81  2006/11/16 14:06:20  camacho
01368 * Add missing Deleter specialization
01369 *
01370 * Revision 1.80  2006/11/14 15:56:41  camacho
01371 * Bring up to date with most recent PSSM engine optimizations
01372 *
01373 * Revision 1.79  2006/08/31 22:04:52  camacho
01374 * Minor fix
01375 *
01376 * Revision 1.78  2006/07/05 15:24:15  camacho
01377 * Changes to support new value of BLASTAA_SIZE
01378 *
01379 * Revision 1.77  2006/06/05 13:34:05  madden
01380 * Changes to remove [GS]etMatrixPath and use callback instead
01381 *
01382 * Revision 1.76  2006/05/24 17:22:43  madden
01383 * remove call to FindMatrixPath
01384 *
01385 * Revision 1.75  2006/04/26 14:24:47  camacho
01386 * Fix compiler warning
01387 *
01388 * Revision 1.74  2006/02/21 22:10:15  camacho
01389 * Use CNcbiOstrstream and CNcbiOstrstreamToString
01390 *
01391 * Revision 1.73  2006/02/17 18:50:38  camacho
01392 * Replace ostringstream for CNcbiOstrstream for portability issues
01393 *
01394 * Revision 1.72  2006/01/30 17:30:34  camacho
01395 * Relax the maximum permissible difference when comparing doubles
01396 *
01397 * Revision 1.71  2005/11/28 20:46:04  camacho
01398 * Fixes to temporary BLAST object manager class to create CScopes
01399 *
01400 * Revision 1.70  2005/11/10 23:43:31  camacho
01401 * Use TestUtil::CTmpObjMgrBlastDbDataLoader
01402 *
01403 * Revision 1.69  2005/10/26 14:30:46  camacho
01404 * Remove redundant code, reuse private PSI-BLAST auxiliary functions
01405 *
01406 * Revision 1.68  2005/10/14 13:47:32  camacho
01407 * Fixes to pacify icc compiler
01408 *
01409 * Revision 1.67  2005/09/26 16:35:15  camacho
01410 * Use CRef<> to store CPssmEngine
01411 *
01412 * Revision 1.66  2005/09/26 14:41:44  camacho
01413 * Renamed blast_psi.hpp -> pssm_engine.hpp
01414 *
01415 * Revision 1.65  2005/09/23 18:59:11  camacho
01416 * Rollback accidental commit
01417 *
01418 * Revision 1.63  2005/08/26 17:14:06  camacho
01419 * Remove unneeded typedefs
01420 *
01421 * Revision 1.62  2005/08/24 14:46:48  camacho
01422 * Updated tests for PSSM engine
01423 *
01424 * Revision 1.61  2005/06/09 20:37:06  camacho
01425 * Use new private header blast_objmgr_priv.hpp
01426 *
01427 * Revision 1.60  2005/05/20 18:33:20  camacho
01428 * refactorings to use CAsn1PssmConverter
01429 *
01430 * Revision 1.59  2005/05/10 16:09:04  camacho
01431 * Changed *_ENCODING #defines to EBlastEncoding enumeration
01432 *
01433 * Revision 1.58  2005/05/04 13:28:38  camacho
01434 * Fix to previous commit
01435 *
01436 * Revision 1.57  2005/05/03 20:45:07  camacho
01437 * Added test for query aligned with gaps
01438 *
01439 * Revision 1.56  2005/04/29 14:44:53  bealer
01440 * - Fix for inverted test in DOUBLES_EQUAL_MSG (required for release mode).
01441 *
01442 * Revision 1.55  2005/04/27 20:08:40  dondosha
01443 * PHI-blast boolean argument has been removed from BlastSetup_ScoreBlkInit
01444 *
01445 * Revision 1.54  2005/04/22 13:32:13  camacho
01446 * Fix to previous commit
01447 *
01448 * Revision 1.53  2005/04/21 20:45:58  camacho
01449 * Added test for the case when the query sequence is aligned with internal gaps only on a given column
01450 *
01451 * Revision 1.52  2005/03/23 14:27:00  camacho
01452 * Fix compiler warnings
01453 *
01454 * Revision 1.51  2005/03/22 15:47:50  camacho
01455 * added tests for backwards compatibility with old PSSM engine
01456 *
01457 * Revision 1.50  2005/03/21 23:34:44  bealer
01458 * - Doubles/message macro.
01459 *
01460 * Revision 1.49  2005/03/04 17:20:45  bealer
01461 * - Command line option support.
01462 *
01463 * Revision 1.48  2005/03/03 17:45:58  camacho
01464 * fix to loading pssm
01465 *
01466 * Revision 1.47  2005/02/25 19:48:14  camacho
01467 * Added unit test for comparing new vs. old IMPALA scaling
01468 *
01469 * Revision 1.46  2005/02/22 22:51:20  camacho
01470 * + impala_scaling_factor, first cut
01471 *
01472 * Revision 1.45  2005/02/14 14:17:17  camacho
01473 * Changes to use SBlastScoreMatrix
01474 *
01475 * Revision 1.44  2005/02/10 15:43:28  dondosha
01476 * Small memory leak fix
01477 *
01478 * Revision 1.43  2005/01/26 17:52:13  camacho
01479 * Remove unused variables
01480 *
01481 * Revision 1.42  2005/01/22 16:57:01  camacho
01482 * cosmetic change
01483 *
01484 * Revision 1.41  2005/01/10 15:43:52  camacho
01485 * + data/seqp database to database loader
01486 *
01487 * Revision 1.40  2004/12/28 16:48:26  camacho
01488 * 1. Use typedefs to AutoPtr consistently
01489 * 2. Use SBlastSequence structure instead of std::pair as return value to
01490 *    blast::GetSequence
01491 *
01492 * Revision 1.39  2004/12/22 16:26:56  camacho
01493 * Remove diagnostics output
01494 *
01495 * Revision 1.38  2004/12/13 22:37:56  camacho
01496 * Consolidated structure group customizations in option: nsg_compatibility_mode
01497 *
01498 * Revision 1.37  2004/12/09 15:24:10  dondosha
01499 * BlastSetup_GetScoreBlock renamed to BlastSetup_ScoreBlkInit
01500 *
01501 * Revision 1.36  2004/11/30 20:43:38  camacho
01502 * Replace call to GetLoaderNameFromArgs
01503 *
01504 * Revision 1.35  2004/11/29 20:18:03  camacho
01505 * Fix setUp/tearDown methods to avoid creating/deleting the Genbank data loader
01506 * as this spawns many maintenance threads and causes valgrind to fail.
01507 *
01508 * Revision 1.34  2004/11/24 15:16:58  camacho
01509 * + test for default PSIBLAST input data strategy
01510 *
01511 * Revision 1.33  2004/11/23 21:50:08  camacho
01512 * Removed local initialization of ideal Karlin-Altschul parameters
01513 *
01514 * Revision 1.32  2004/11/23 17:53:18  camacho
01515 * Return NULL rather than "" in null matrix test case
01516 *
01517 * Revision 1.31  2004/11/22 15:18:13  camacho
01518 * + tests & mock object for purge stage of PSSM creation
01519 *
01520 * Revision 1.30  2004/11/02 21:27:22  camacho
01521 * Fixes for recent changes in PSI-BLAST function names
01522 *
01523 * Revision 1.29  2004/10/18 14:51:49  camacho
01524 * Added argument to _PSIComputeSequenceWeights
01525 *
01526 * Revision 1.28  2004/10/13 20:49:22  camacho
01527 * + support for requesting diagnostics information and specifying underlying matrix
01528 *
01529 * Revision 1.27  2004/10/13 15:46:23  camacho
01530 * + tests for invalid PSSM data
01531 *
01532 * Revision 1.26  2004/10/13 01:43:54  camacho
01533 * + unit test for checking 0-length queries
01534 *
01535 * Revision 1.25  2004/10/12 21:27:49  camacho
01536 * + mock objects to simulate bad pssm input data
01537 *
01538 * Revision 1.24  2004/10/12 14:19:36  camacho
01539 * Update for scoremat.asn reorganization
01540 *
01541 * Revision 1.23  2004/08/31 16:10:07  camacho
01542 * Use CppUnit assertions for floating point values
01543 *
01544 * Revision 1.22  2004/08/05 19:20:27  camacho
01545 * Temporarily disable failing test
01546 *
01547 * Revision 1.21  2004/08/04 21:20:55  camacho
01548 * Change seq-align file
01549 *
01550 * Revision 1.20  2004/08/04 20:28:49  camacho
01551 * Updated to reflect recent changes in core PSSM engine structures
01552 *
01553 * Revision 1.19  2004/08/02 13:31:28  camacho
01554 * Renaming of PSSM engine structures
01555 *
01556 * Revision 1.18  2004/07/29 17:56:12  camacho
01557 * Updated to use new interfaces, needs more test data
01558 *
01559 * Revision 1.17  2004/07/22 16:37:59  camacho
01560 * Fixes for exchanging data loaders
01561 *
01562 * Revision 1.16  2004/07/22 13:58:59  camacho
01563 * Use the new C++ Object Manager interfaces
01564 *
01565 * Revision 1.15  2004/07/21 17:51:03  camacho
01566 * disable failing unit tests for right now
01567 *
01568 * Revision 1.14  2004/07/07 18:55:38  camacho
01569 * Add test for handling out-of-memory conditions
01570 *
01571 * Revision 1.13  2004/07/06 15:58:45  dondosha
01572 * Use EBlastProgramType enumeration type for program when calling C functions
01573 *
01574 * Revision 1.12  2004/07/02 18:02:54  camacho
01575 * Added more tests for purging matching sequences and sequence weights
01576 * computation.
01577 *
01578 * Revision 1.11  2004/06/22 16:46:19  camacho
01579 * Changed the blast_type_* definitions for the EBlastProgramType enumeration.
01580 *
01581 * Revision 1.10  2004/06/21 15:51:34  camacho
01582 * Added compute extents tests, fixed memory leaks
01583 *
01584 * Revision 1.9  2004/06/18 15:05:34  camacho
01585 * Added more comparison tests
01586 *
01587 * Revision 1.8  2004/06/16 15:23:48  camacho
01588 * Added posPurgeMatches unit tests
01589 *
01590 * Revision 1.7  2004/06/16 12:48:26  camacho
01591 * Fix compiler warnings
01592 *
01593 * Revision 1.6  2004/06/16 12:12:47  camacho
01594 * Remove extra comma in enumerated type
01595 *
01596 * Revision 1.5  2004/06/14 21:33:49  camacho
01597 * Refactored test code to use a pssm engine mock object
01598 *
01599 * Revision 1.4  2004/06/09 21:34:20  camacho
01600 * Minor changes
01601 *
01602 * Revision 1.3  2004/06/09 16:45:17  camacho
01603 * Fix for solaris build
01604 *
01605 * Revision 1.2  2004/06/09 16:17:29  camacho
01606 * Minor fixes
01607 *
01608 * Revision 1.1  2004/06/09 14:58:55  camacho
01609 * Initial revision
01610 *
01611 *
01612 * ===========================================================================
01613 */
01614 
01615 

Generated on Sun Dec 6 22:17:41 2009 for NCBI C++ ToolKit by  doxygen 1.4.6
Modified on Mon Dec 07 16:20:51 2009 by modify_doxy.py rev. 173732