00001 #ifndef ALGO_ALIGN_CONTIG_ASSEMBLY___CONTIG_ASSEMBLY__HPP
00002 #define ALGO_ALIGN_CONTIG_ASSEMBLY___CONTIG_ASSEMBLY__HPP
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034 #include <objects/seqloc/Seq_id.hpp>
00035 #include <objmgr/scope.hpp>
00036 #include <objects/seqloc/Na_strand_.hpp>
00037 #include <util/range.hpp>
00038
00039 BEGIN_NCBI_SCOPE
00040
00041 BEGIN_objects_SCOPE
00042 class CSeq_id;
00043 class CSeq_align;
00044 class CSeq_align_set;
00045 class CDense_seg;
00046 class CScope;
00047 class CAlnVec;
00048 END_objects_SCOPE
00049
00050
00051
00052
00053 class CContigAssembly
00054 {
00055 public:
00056 class CAlnStats : public CObject
00057 {
00058 public:
00059 CAlnStats(unsigned int adjusted_len,
00060 unsigned int mm,
00061 unsigned int gaps) :
00062 m_AdjustedLen(adjusted_len), m_MM(mm), m_Gaps(gaps), m_PctIdent(0.0) {}
00063 CAlnStats(const objects::CDense_seg& ds, objects::CScope& scope);
00064
00065 unsigned int GetAdjustedLength() const {return m_AdjustedLen;}
00066
00067 double GetFracIdentity() const { return m_PctIdent; }
00068 unsigned int GetNumMismatches() const {return m_MM;}
00069 unsigned int GetNumGaps() const {return m_Gaps;}
00070 private:
00071 unsigned int m_AdjustedLen;
00072 unsigned int m_MM;
00073 unsigned int m_Gaps;
00074 double m_PctIdent;
00075 };
00076
00077
00078
00079
00080
00081 static vector<CRef<objects::CSeq_align> >
00082 Align(const objects::CSeq_id& id0, const objects::CSeq_id& id1,
00083 const string& blast_params, double min_ident,
00084 unsigned int max_end_slop, objects::CScope& scope,
00085 CNcbiOstream* ostr = 0,
00086 const vector<unsigned int>& band_halfwidths
00087 = vector<unsigned int>(1, 200),
00088 unsigned int diag_finding_window = 200,
00089 unsigned int min_align_length = 50,
00090 objects::ENa_strand strand0 = objects::eNa_strand_unknown,
00091 objects::ENa_strand strand1 = objects::eNa_strand_unknown);
00092
00093
00094
00095
00096
00097 static CRef<objects::CSeq_align_set>
00098 Blastn(const objects::CSeq_id& query_id,
00099 const objects::CSeq_id& subject_id,
00100 const string& param_string, objects::CScope& scope);
00101
00102 static CRef<objects::CSeq_align_set>
00103 Blastn(const objects::CSeq_loc& query_loc,
00104 const objects::CSeq_loc& subject_loc,
00105 const string& param_string, objects::CScope& scope);
00106
00107
00108
00109 static void FindDiagFromAlignSet(const objects::CSeq_align_set& align_set,
00110 objects::CScope& scope,
00111 unsigned int window_size,
00112 objects::ENa_strand& strand,
00113 unsigned int& diag);
00114
00115
00116
00117 static CRef<objects::CDense_seg>
00118 BandedGlobalAlignment(const objects::CSeq_id& id0,
00119 const objects::CSeq_id& id1,
00120 objects::ENa_strand strand,
00121 unsigned int diag,
00122 unsigned int half_width,
00123 objects::CScope& scope);
00124
00125
00126
00127
00128 static CRef<objects::CDense_seg>
00129 BestLocalSubAlignment(const objects::CDense_seg& ds_in,
00130 objects::CScope& scope);
00131
00132
00133
00134 static void DiagCounts(const objects::CSeq_align_set& align_set,
00135 objects::CScope& scope,
00136 vector<unsigned int>& plus_vec,
00137 vector<unsigned int>& minus_vec);
00138
00139 typedef map<unsigned int, unsigned int> TDiagMap;
00140 static void DiagCounts(const objects::CSeq_align_set& align_set,
00141 objects::CScope& scope,
00142 TDiagMap& plus_map,
00143 TDiagMap& minus_map);
00144
00145
00146
00147 typedef CRange<unsigned int> TRange;
00148 static void FindMaxRange(const vector<unsigned int>& vec,
00149 unsigned int window,
00150 unsigned int& max,
00151 vector<TRange>& max_range);
00152
00153
00154 static void FindMaxRange(const TDiagMap& map,
00155 unsigned int window,
00156 unsigned int& max,
00157 vector<TRange>& max_range);
00158
00159
00160 static bool IsDovetail(const objects::CDense_seg& ds,
00161 unsigned int slop, objects::CScope& scope);
00162 static bool IsAtLeastHalfDovetail(const objects::CDense_seg& ds,
00163 unsigned int slop,
00164 objects::CScope& scope);
00165 static bool IsContained(const objects::CDense_seg& ds,
00166 unsigned int slop, objects::CScope& scope);
00167 static double FracIdent(const objects::CDense_seg& ds,
00168 objects::CScope& scope);
00169
00170
00171
00172
00173 struct SAlignStats {
00174
00175
00176 struct STails {
00177 TSeqPos left;
00178 TSeqPos right;
00179 };
00180
00181
00182 SAlignStats()
00183 : total_length(0),
00184 aligned_length(0),
00185 gap_count(0),
00186 mismatches(0),
00187 pct_identity(0)
00188 {
00189 }
00190
00191
00192 TSeqPos total_length;
00193
00194
00195 TSeqPos aligned_length;
00196
00197
00198 TSeqPos gap_count;
00199
00200
00201 TSeqPos mismatches;
00202
00203
00204 double pct_identity;
00205
00206
00207 vector<STails> tails;
00208
00209
00210 vector<TSeqPos> gaps;
00211
00212
00213 vector<bool> is_simple;
00214 };
00215 static void GatherAlignStats(const objects::CAlnVec& vec,
00216 SAlignStats& align_stats);
00217 static void GatherAlignStats(const objects::CDense_seg& ds,
00218 objects::CScope& scope,
00219 SAlignStats& align_stats);
00220 static void GatherAlignStats(const objects::CSeq_align& aln,
00221 objects::CScope& scope,
00222 SAlignStats& align_stats);
00223
00224 private:
00225 static void x_OrientAlign(objects::CDense_seg& ds, objects::CScope& scope);
00226 static bool x_IsAllowedStrands(const objects::CDense_seg& ds,
00227 objects::ENa_strand strand0,
00228 objects::ENa_strand strand1);
00229 static TSeqPos x_DensegLength(const objects::CDense_seg& ds);
00230
00231 static void x_GatherIdentStats(const objects::CAlnVec& vec,
00232 SAlignStats& align_stats);
00233 };
00234
00235
00236
00237
00238
00239
00240
00241
00242
00243 END_NCBI_SCOPE
00244
00245 #endif // ALGO_ALIGN_CONTIG_ASSEMBLY___CONTIG_ASSEMBLY__HPP
00246
00247
00248