src/algo/blast/api/blast_seqalign.cpp File Reference


Detailed Description

Utility function to convert internal BLAST result structures into CSeq_align_set objects.

Definition in file blast_seqalign.cpp.

#include <ncbi_pch.hpp>
#include "blast_seqalign.hpp"
#include <algo/blast/api/blast_aux.hpp>
#include <algo/blast/api/query_data.hpp>
#include <objects/seqloc/Seq_loc.hpp>
#include <objects/seqloc/Seq_interval.hpp>
#include <objects/seqalign/seqalign__.hpp>
#include <objects/general/Object_id.hpp>
#include <serial/iterator.hpp>
#include <objmgr/util/seq_align_util.hpp>
#include <algorithm>

Include dependency graph for blast_seqalign.cpp:

Go to the source code of this file.

Defines

#define SMALLEST_EVALUE   1.0e-180
 Threshold below which e-values are saved as 0.
#define GAP_VALUE   -1
 Value in the Dense-seg indicating a gap.

Functions

 USING_SCOPE (objects)
static ENa_strand s_Frame2Strand (short frame)
 Converts a frame into the appropriate strand.
static int s_GetCurrPos (int &pos, int pos2advance)
 Advances position in a sequence, according to an edit script instruction.
static TSeqPos s_GetAlignmentStart (int &curr_pos, int num, ENa_strand strand, bool translate, int length, int original_length, short frame)
 Finds the starting position of a sequence segment in an alignment, given an editing script.
static Int4 s_GetProteinFrameLength (Int4 nuc_length, Int2 frame)
 Finds length of a protein frame given a nucleotide length and a frame number.
static void s_CollectSeqAlignData (const BlastHSP *hsp, const GapEditScript *esp, unsigned int first, unsigned int nsegs, vector< TSignedSeqPos > &starts, vector< TSeqPos > &lengths, vector< ENa_strand > &strands, Int4 query_length, Int4 subject_length, bool translate1, bool translate2)
 Fills vectors of start positions, lengths and strands for all alignment segments.
static void s_CreateDenseg (CDense_seg &dense_seg, CRef< CSeq_id > master, CRef< CSeq_id > slave, vector< TSignedSeqPos > &starts, vector< TSeqPos > &lengths, vector< ENa_strand > &strands)
 Creates a Dense-seg object from the starts, lengths and strands vectors and two Seq-ids.
static CSeq_align::C_Segs::TStd s_CreateStdSegs (CRef< CSeq_id > master, CRef< CSeq_id > slave, vector< TSignedSeqPos > &starts, vector< TSeqPos > &lengths, vector< ENa_strand > &strands, bool translate_master, bool translate_slave)
 Creates a Std-seg object from the starts, lengths and strands vectors and two Seq-ids for a translated search.
static void s_CorrectUASequence (BlastHSP *hsp)
 Checks if any decline-to-align segments immediately follow an insertion or deletion, and swaps any such segments so indels are always to the right of the decline-to-align segments.
static CRef< CSeq_aligns_CreateSeqAlign (CRef< CSeq_id > master, CRef< CSeq_id > slave, vector< TSignedSeqPos > starts, vector< TSeqPos > lengths, vector< ENa_strand > strands, bool translate_master, bool translate_slave)
 Creates a Seq-align for a single HSP from precalculated vectors of start positions, lengths and strands of segments, sequence identifiers and other information.
static CRef< CSeq_aligns_BlastHSP2SeqAlign (EBlastProgramType program, BlastHSP *hsp, CRef< CSeq_id > id1, CRef< CSeq_id > id2, Int4 query_length, Int4 subject_length)
 Converts a traceback editing block to a Seq-align, provided the 2 sequence identifiers.
static CRef< CSeq_aligns_OOFBlastHSP2SeqAlign (EBlastProgramType program, BlastHSP *hsp, CRef< CSeq_id > query_id, CRef< CSeq_id > subject_id, Int4 query_length, Int4 subject_length)
 This function is used for out-of-frame traceback conversion Converts an OOF editing script chain to a Seq-align of type Std-seg.
static CRef< CScores_MakeScore (const string &ident_string, double d=0.0, int i=0)
 Creates and initializes CScore with a given name, and with integer or double value.
static size_t s_CalculateScoreVectorSize (const BlastHSP *hsp, const vector< int > &gi_list)
 Computes the exact size of a CSeq_align::TScore for a given HSP.
static void s_BuildScoreList (const BlastHSP *hsp, CSeq_align::TScore &scores, const vector< int > &gi_list)
 Creates a list of score objects for a Seq-align, given an HSP structure.
static void s_AddScoresToSeqAlign (CRef< CSeq_align > &seqalign, const BlastHSP *hsp, const vector< int > &gi_list)
 Given an HSP structure, creates a list of scores and inserts them into a Seq-align.
CRef< CDense_diagx_UngappedHSPToDenseDiag (BlastHSP *hsp, CRef< CSeq_id > query_id, CRef< CSeq_id > subject_id, Int4 query_length, Int4 subject_length, const vector< int > &gi_list)
 Creates a Dense-diag object from HSP information and sequence identifiers for a non-translated ungapped search.
CRef< CStd_segx_UngappedHSPToStdSeg (BlastHSP *hsp, CRef< CSeq_id > query_id, CRef< CSeq_id > subject_id, Int4 query_length, Int4 subject_length, const vector< int > &gi_list)
 Creates a Std-seg object from HSP information and sequence identifiers for a translated ungapped search.
void BLASTUngappedHspListToSeqAlign (EBlastProgramType program, BlastHSPList *hsp_list, CRef< CSeq_id > query_id, CRef< CSeq_id > subject_id, Int4 query_length, Int4 subject_length, const vector< int > &gi_list, vector< CRef< CSeq_align > > &sa_vector)
 Creates a Seq-align from an HSP list for an ungapped search.
void BLASTHspListToSeqAlign (EBlastProgramType program, BlastHSPList *hsp_list, CRef< CSeq_id > query_id, CRef< CSeq_id > subject_id, Int4 query_length, Int4 subject_length, bool is_ooframe, const vector< int > &gi_list, vector< CRef< CSeq_align > > &sa_vector)
 This is called for each query and each subject in a BLAST search.
static CRef< CSeq_align_sets_CreateEmptySeq_align_set ()
void RemapToQueryLoc (CRef< CSeq_align > sar, const CSeq_loc &query)
 Remaps Seq-align offsets relative to the query Seq-loc.
static void s_RemapToSubjectLoc (CRef< CSeq_align > &subj_aligns, const CSeq_loc &subj_loc)
 Remap subject alignment if its location specified the reverse strand or a starting location other than the beginning of the sequence.
CRef< CSeq_align_setBlastHitList2SeqAlign_OMF (const BlastHitList *hit_list, EBlastProgramType prog, const CSeq_loc &query_loc, TSeqPos query_length, const IBlastSeqInfoSrc *seqinfo_src, bool is_gapped, bool is_ooframe, TSeqLocInfoVector &subj_masks)
TSeqAlignVector PhiBlastResults2SeqAlign_OMF (const BlastHSPResults *results, EBlastProgramType prog, class ILocalQueryData &query, const IBlastSeqInfoSrc *seqinfo_src, const SPHIQueryInfo *pattern_info, vector< TSeqLocInfoVector > &subj_masks)
static void s_AdjustNegativeSubjFrameInBlastn (ENa_strand subj_strand, EBlastProgramType program, BlastHSPList *hsp_list)
 This function changes the subject frame for HSPs if the program is blastn and the subject was specified with a negative strand.
static TSeqAlignVector s_BLAST_OneSubjectResults2CSeqAlign (const BlastHSPResults *results, ILocalQueryData &query_data, const IBlastSeqInfoSrc &seqinfo_src, EBlastProgramType prog, Uint4 subj_idx, bool is_gapped, bool is_ooframe, vector< TSeqLocInfoVector > &subj_masks)
 Extracts results from the BlastHSPResults structure for only one subject sequence, identified by its index, and converts them into a vector of CSeq_align_set objects.
static TSeqAlignVector s_TransposeSeqAlignVector (const TSeqAlignVector &alnvec, const size_t num_queries, const size_t num_subjects)
 Transpose the (linearly organized) seqalign set matrix from (q1 s1 q2 s1 .
static TSeqAlignVector s_BlastResults2SeqAlignSequenceCmp_OMF (const BlastHSPResults *results, EBlastProgramType prog, class ILocalQueryData &query_data, const IBlastSeqInfoSrc *seqinfo_src, bool is_gapped, bool is_ooframe, vector< TSeqLocInfoVector > &subj_masks)
static TSeqAlignVector s_BlastResults2SeqAlignDatabaseSearch_OMF (const BlastHSPResults *results, EBlastProgramType prog, class ILocalQueryData &query, const IBlastSeqInfoSrc *seqinfo_src, bool is_gapped, bool is_ooframe, vector< TSeqLocInfoVector > &subj_masks)
TSeqAlignVector LocalBlastResults2SeqAlign (BlastHSPResults *hsp_results, ILocalQueryData &local_data, const IBlastSeqInfoSrc &seqinfo_src, EBlastProgramType program, bool gapped, bool oof_mode, vector< TSeqLocInfoVector > &subj_masks, EResultType result_type=eDatabaseSearch)
 Convert traceback output into Seq-align format.

Variables

static const TSeqPos kBlastAlignmentDim = 2
 BLAST alignments have always 2 dimensions (i.e.


Generated on Mon Dec 7 06:54:01 2009 for NCBI C++ ToolKit by  doxygen 1.4.6
Modified on Mon Dec 07 16:21:37 2009 by modify_doxy.py rev. 173732