00001 #ifndef ALGO_ALIGN_PROSPLIGN__HPP
00002 #define ALGO_ALIGN_PROSPLIGN__HPP
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 #include <corelib/ncbistd.hpp>
00039 #include <corelib/ncbiargs.hpp>
00040 #include <corelib/ncbiobj.hpp>
00041 #include <objects/seqalign/seqalign__.hpp>
00042 #include <objmgr/seq_vector_ci.hpp>
00043
00044 #include <list>
00045
00046 BEGIN_NCBI_SCOPE
00047
00048 BEGIN_SCOPE(objects)
00049 class CScope;
00050 END_SCOPE(objects)
00051
00052
00053 class CProSplignOptions_Base: public CObject
00054 {
00055 public:
00056 static void SetupArgDescriptions(CArgDescriptions* argdescr);
00057
00058
00059 CProSplignOptions_Base();
00060 CProSplignOptions_Base(const CArgs& args);
00061
00062 CProSplignOptions_Base& SetScoreMatrix(const string& matrix_name);
00063 const string& GetScoreMatrix() const;
00064
00065 static const string default_score_matrix_name;
00066
00067 private:
00068 string score_matrix_name;
00069 };
00070
00071 class CProSplignScoring: public CProSplignOptions_Base
00072 {
00073 public:
00074 static void SetupArgDescriptions(CArgDescriptions* argdescr);
00075
00076
00077 CProSplignScoring();
00078
00079 CProSplignScoring(const CArgs& args);
00080
00081
00082 CProSplignScoring& SetMinIntronLen(int);
00083 int GetMinIntronLen() const;
00084
00085
00086
00087
00088 CProSplignScoring& SetGapOpeningCost(int);
00089 int GetGapOpeningCost() const;
00090
00091
00092 CProSplignScoring& SetGapExtensionCost(int);
00093 int GetGapExtensionCost() const;
00094
00095 CProSplignScoring& SetFrameshiftOpeningCost(int);
00096 int GetFrameshiftOpeningCost() const;
00097
00098
00099 CProSplignScoring& SetGTIntronCost(int);
00100 int GetGTIntronCost() const;
00101
00102 CProSplignScoring& SetGCIntronCost(int);
00103 int GetGCIntronCost() const;
00104
00105 CProSplignScoring& SetATIntronCost(int);
00106 int GetATIntronCost() const;
00107
00108
00109
00110
00111 CProSplignScoring& SetNonConsensusIntronCost(int);
00112 int GetNonConsensusIntronCost() const;
00113
00114
00115
00116 CProSplignScoring& SetInvertedIntronExtensionCost(int);
00117 int GetInvertedIntronExtensionCost() const;
00118
00119 public:
00120 static const int default_min_intron_len = 30;
00121
00122 static const int default_gap_opening = 10;
00123 static const int default_gap_extension = 1;
00124 static const int default_frameshift_opening = 30;
00125
00126 static const int default_intron_GT = 15;
00127 static const int default_intron_GC = 20;
00128 static const int default_intron_AT = 25;
00129 static const int default_intron_non_consensus = 34;
00130 static const int default_inverted_intron_extension = 1000;
00131
00132 private:
00133 int min_intron_len;
00134 int gap_opening;
00135 int gap_extension;
00136 int frameshift_opening;
00137 int intron_GT;
00138 int intron_GC;
00139 int intron_AT;
00140 int intron_non_consensus;
00141 int inverted_intron_extension;
00142 };
00143
00144
00145
00146
00147
00148
00149
00150 class CProSplignOutputOptions: public CProSplignOptions_Base
00151 {
00152 public:
00153 enum EMode {
00154
00155 eWithHoles,
00156
00157 ePassThrough,
00158 };
00159
00160 static void SetupArgDescriptions(CArgDescriptions* argdescr);
00161
00162 CProSplignOutputOptions(EMode mode = eWithHoles);
00163 CProSplignOutputOptions(const CArgs& args);
00164
00165 bool IsPassThrough() const;
00166
00167
00168
00169 CProSplignOutputOptions& SetEatGaps(bool);
00170 bool GetEatGaps() const;
00171
00172
00173 CProSplignOutputOptions& SetFlankPositives(int);
00174 int GetFlankPositives() const;
00175
00176 CProSplignOutputOptions& SetTotalPositives(int);
00177 int GetTotalPositives() const;
00178
00179
00180 CProSplignOutputOptions& SetMaxBadLen(int);
00181 int GetMaxBadLen() const;
00182 CProSplignOutputOptions& SetMinPositives(int);
00183 int GetMinPositives() const;
00184
00185
00186 CProSplignOutputOptions& SetMinExonId(int);
00187 int GetMinExonId() const;
00188
00189 CProSplignOutputOptions& SetMinExonPos(int);
00190 int GetMinExonPos() const;
00191
00192
00193 CProSplignOutputOptions& SetMinFlankingExonLen(int);
00194 int GetMinFlankingExonLen() const;
00195
00196 CProSplignOutputOptions& SetMinGoodLen(int);
00197 int GetMinGoodLen() const;
00198
00199
00200
00201
00202 CProSplignOutputOptions& SetStartBonus(int);
00203 int GetStartBonus() const;
00204
00205 CProSplignOutputOptions& SetStopBonus(int);
00206 int GetStopBonus() const;
00207
00208 public:
00209 static const bool default_eat_gaps = true;
00210
00211 static const int default_flank_positives = 55;
00212 static const int default_total_positives = 70;
00213
00214 static const int default_max_bad_len = 45;
00215 static const int default_min_positives = 15;
00216
00217 static const int default_min_exon_id = 30;
00218 static const int default_min_exon_pos = 55;
00219
00220 static const int default_min_flanking_exon_len = 15;
00221 static const int default_min_good_len = 59;
00222
00223 static const int default_start_bonus = 8;
00224 static const int default_stop_bonus = 8;
00225
00226 private:
00227 bool eat_gaps;
00228 int flank_positives;
00229 int total_positives;
00230 int max_bad_len;
00231 int min_positives;
00232 int min_exon_id;
00233 int min_exon_pos;
00234 int min_flanking_exon_len;
00235 int min_good_len;
00236 int start_bonus;
00237 int stop_bonus;
00238 };
00239
00240 class CProSplignText;
00241
00242
00243
00244 class CProSplign: public CObject
00245 {
00246 public:
00247
00248
00249
00250 CProSplign( CProSplignScoring scoring = CProSplignScoring(), bool intronless=false );
00251 ~CProSplign();
00252
00253
00254
00255
00256
00257 CRef<objects::CSeq_align>
00258 FindAlignment(objects::CScope& scope,
00259 const objects::CSeq_id& protein,
00260 const objects::CSeq_loc& genomic,
00261 CProSplignOutputOptions output_options = CProSplignOutputOptions())
00262 {
00263 CRef<objects::CSeq_align> align_ref;
00264 align_ref = FindGlobalAlignment(scope, protein, genomic);
00265 align_ref = RefineAlignment(scope, *align_ref, output_options);
00266 return align_ref;
00267 }
00268
00269
00270
00271
00272
00273 CRef<objects::CSeq_align>
00274 FindGlobalAlignment(objects::CScope& scope,
00275 const objects::CSeq_id& protein,
00276 const objects::CSeq_loc& genomic);
00277
00278
00279
00280 CRef<objects::CSeq_align>
00281 RefineAlignment(objects::CScope& scope,
00282 const objects::CSeq_align& seq_align,
00283 CProSplignOutputOptions output_options = CProSplignOutputOptions());
00284
00285
00286 CProSplign( CProSplignScoring scoring, bool intronless, bool one_stage, bool just_second_stage, bool old);
00287 const vector<pair<int, int> >& GetExons() const;
00288 vector<pair<int, int> >& SetExons();
00289 void GetFlanks(bool& lgap, bool& rgap) const;
00290 void SetFlanks(bool lgap, bool rgap);
00291
00292 public:
00293 class CImplementation;
00294 private:
00295 auto_ptr<CImplementation> m_implementation;
00296
00297 private:
00298
00299 CProSplign(const CProSplign&);
00300 CProSplign& operator=(const CProSplign&);
00301 };
00302
00303 BEGIN_SCOPE(prosplign)
00304 class CSubstMatrix;
00305 class CTranslationTable;
00306 END_SCOPE(prosplign)
00307
00308
00309
00310
00311
00312
00313
00314
00315 class CProSplignText {
00316 public:
00317
00318 static void Output(const objects::CSeq_align& seqalign, objects::CScope& scope, ostream& out, int width, const string& matrix_name = "BLOSUM62");
00319
00320 CProSplignText(objects::CScope& scope, const objects::CSeq_align& seqalign, const string& matrix_name = "BLOSUM62");
00321 ~CProSplignText();
00322
00323 const string& GetDNA() { return m_dna; }
00324 const string& GetTranslation() { return m_translation; }
00325 const string& GetMatch() { return m_match; }
00326 const string& GetProtein() { return m_protein; }
00327
00328 private:
00329 string m_dna;
00330 string m_translation;
00331 string m_match;
00332 string m_protein;
00333 auto_ptr<prosplign::CSubstMatrix> m_matrix;
00334 CRef<prosplign::CTranslationTable> m_trans_table;
00335
00336 void AddDNAText(objects::CSeqVector_CI& genomic_ci, int& nuc_prev, size_t len);
00337 void TranslateDNA(int phase, size_t len, bool is_insertion);
00338 void AddProtText(objects::CSeqVector_CI& protein_ci, int& prot_prev, size_t len);
00339 void MatchText(size_t len, bool is_match=false);
00340 char MatchChar(size_t i);
00341 void AddHoleText(bool prev_3_prime_splice, bool cur_5_prime_splice,
00342 objects::CSeqVector_CI& genomic_ci, objects::CSeqVector_CI& protein_ci,
00343 int& nuc_prev, int& prot_prev,
00344 int nuc_cur_start, int prot_cur_start);
00345 void AddSpliceText(objects::CSeqVector_CI& genomic_ci, int& nuc_prev, char match);
00346 };
00347
00348 END_NCBI_SCOPE
00349
00350
00351 #endif
00352
00353