Definition in file blast_util.c.
#include <algo/blast/core/blast_util.h>
#include <algo/blast/core/blast_filter.h>
#include <algo/blast/core/blast_stat.h>
Include dependency graph for blast_util.c:

Go to the source code of this file.
Functions | |
| void | __sfree (void **x) |
| Implemented in blast_util.c. | |
| SSeqRange | SSeqRangeNew (Int4 start, Int4 stop) |
| Create a new SSeqRange structure with both fields initialized. | |
| Int4 | SSeqRangeArrayLessThanOrEqual (const SSeqRange *ranges, Int4 num_ranges, Int4 target) |
| Returns the index of the range, such that this element is the first range that either contains the target or if no such range exists, the index of the first range, such that the target is less than this range. | |
| static void | s_BlastSequenceBlkFreeSeqRanges (BLAST_SequenceBlk *seq_blk) |
| Auxiliary function to free the BLAST_SequenceBlk::seq_ranges field if applicable. | |
| Int2 | BlastSetUp_SeqBlkNew (const Uint1 *buffer, Int4 length, BLAST_SequenceBlk **seq_blk, Boolean buffer_allocated) |
| Allocates memory for *sequence_blk and then populates it. | |
| Int2 | BlastSeqBlkNew (BLAST_SequenceBlk **retval) |
| Allocates a new sequence block structure. | |
| Int2 | BlastSeqBlkSetSequence (BLAST_SequenceBlk *seq_blk, const Uint1 *sequence, Int4 seqlen) |
| Stores the sequence in the sequence block structure. | |
| Int2 | BlastSeqBlkSetCompressedSequence (BLAST_SequenceBlk *seq_blk, const Uint1 *sequence) |
| Stores the compressed nucleotide sequence in the sequence block structure for the subject sequence when BLASTing 2 sequences. | |
| Int2 | BlastSeqBlkSetSeqRanges (BLAST_SequenceBlk *seq_blk, SSeqRange *seq_ranges, Uint4 num_seq_ranges, Boolean copy_seq_ranges) |
| Sets the seq_range and related fields appropriately in the BLAST_SequenceBlk structure. | |
| void | BlastSequenceBlkClean (BLAST_SequenceBlk *seq_blk) |
| Deallocate memory only for the sequence in the sequence block. | |
| BLAST_SequenceBlk * | BlastSequenceBlkFree (BLAST_SequenceBlk *seq_blk) |
| Deallocate memory for a sequence block. | |
| void | BlastSequenceBlkCopy (BLAST_SequenceBlk **copy, BLAST_SequenceBlk *src) |
| Copies contents of the source sequence block without copying sequence buffers; sets all "field_allocated" booleans to FALSE, to make sure fields are not freed on the call to BlastSequenceBlkFree. | |
| Int2 | BlastProgram2Number (const char *program, EBlastProgramType *number) |
| Set number for a given program type. | |
| Int2 | BlastNumber2Program (EBlastProgramType number, char **program) |
| Return string name for program given a number. | |
| static Uint1 | s_CodonToAA (Uint1 *codon, const Uint1 *codes) |
| Translate 3 nucleotides into an amino acid MUST have 'X' as unknown amino acid. | |
| Int4 | BLAST_GetTranslation (const Uint1 *query_seq, const Uint1 *query_seq_rev, Int4 nt_length, Int2 frame, Uint1 *prot_seq, const Uint1 *genetic_code) |
| GetTranslation to get the translation of the nucl. | |
| Int2 | BlastCompressBlastnaSequence (BLAST_SequenceBlk *seq_blk) |
| Adds a specialized representation of sequence data to a sequence block. | |
| Int4 | BLAST_TranslateCompressedSequence (Uint1 *translation, Int4 length, const Uint1 *nt_seq, Int2 frame, Uint1 *prot_seq) |
| Translate a nucleotide sequence without ambiguity codes. | |
| Int2 | GetReverseNuclSequence (const Uint1 *sequence, Int4 length, Uint1 **rev_sequence_ptr) |
| Reverse a nucleotide sequence in the blastna encoding, adding sentinel bytes on both ends. | |
| Int1 | BLAST_ContextToFrame (EBlastProgramType prog_number, Uint4 context_number) |
| This function translates the context number of a context into the frame of the sequence. | |
| Int2 | BLAST_PackDNA (const Uint1 *buffer, Int4 length, EBlastEncoding encoding, Uint1 **packed_seq) |
| Convert a sequence in ncbi4na or blastna encoding into a packed sequence in ncbi2na encoding. | |
| size_t | BLAST_GetTranslatedProteinLength (size_t nucleotide_length, unsigned int context) |
| Calculates the length of frame for a translated protein. | |
| Int2 | BLAST_CreateMixedFrameDNATranslation (BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info) |
| Initialize the mixed-frame sequence for out-of-frame gapped extension. | |
| static Uint1 * | s_BlastGetTranslationTable (const Uint1 *genetic_code, Boolean reverse_complement) |
| Gets the translation array for a given genetic code. | |
| Int2 | BLAST_GetAllTranslations (const Uint1 *nucl_seq, EBlastEncoding encoding, Int4 nucl_length, const Uint1 *genetic_code, Uint1 **translation_buffer_ptr, Int4 **frame_offsets_ptr, Uint1 **mixed_seq_ptr) |
| Translate nucleotide into 6 frames. | |
| int | Blast_GetPartialTranslation (const Uint1 *nucl_seq, Int4 nucl_length, Int2 frame, const Uint1 *genetic_code, Uint1 **translation_buffer_ptr, Int4 *protein_length, Uint1 **mixed_seq_ptr) |
| Get one frame translation - needed when only parts of subject sequences are translated. | |
| Int4 | BLAST_FrameToContext (Int2 frame, EBlastProgramType program) |
| Convert translation frame or strand into a context number suitable for indexing into the BlastQueryInfo::contexts array. | |
| Int4 | BSearchInt4 (Int4 n, Int4 *A, Int4 size) |
| The following binary search routine assumes that array A is filled. | |
| SBlastTargetTranslation * | BlastTargetTranslationFree (SBlastTargetTranslation *target_t) |
| Free SBlastTargetTranslation object to be freed [in]. | |
| Int2 | BlastTargetTranslationNew (BLAST_SequenceBlk *subject_blk, const Uint1 *gen_code_string, EBlastProgramType program_number, Boolean is_ooframe, SBlastTargetTranslation **target) |
| Sets up structure for target translation. | |
| double * | BLAST_GetStandardAaProbabilities () |
| Get the standard amino acid probabilities. | |
| char * | BLAST_StrToUpper (const char *string) |
| Returns a copy of the input string with all its characters turned to uppercase. | |
| unsigned int | BLAST_GetNumberOfContexts (EBlastProgramType p) |
| Get the number of contexts for a given program. | |
| SBlastProgress * | SBlastProgressNew (void *user_data) |
| Allocates and initializes a new SBlastProgress structure. | |
| SBlastProgress * | SBlastProgressFree (SBlastProgress *progress_info) |
| Deallocates a SBlastProgress structure. | |
| void | SBlastProgressReset (SBlastProgress *progress_info) |
| Resets the progress structure to its original state (as if newly allocated) for a fresh start without touching the user_data field. | |
Variables | |
| static char const | rcsid [] |
|
|
Implemented in blast_util.c.
Definition at line 45 of file blast_util.c. References free(). |
|
||||||||||||
|
This function translates the context number of a context into the frame of the sequence.
Definition at line 813 of file blast_util.c. References Blast_QueryIsProtein(), eBlastTypeBlastn, eBlastTypeBlastx, eBlastTypePhiBlastn, eBlastTypeRpsTblastn, eBlastTypeTblastx, INT1_MAX, NUM_FRAMES, and NUM_STRANDS. Referenced by BLAST_GetAllTranslations(), BlastMaskLocDNAToProtein(), BlastMaskLocProteinToDNA(), BlastQueryInfoNew(), BlastTargetTranslationNew(), BOOST_AUTO_TEST_CASE(), OffsetArrayToContextOffsets(), s_AddMask(), and s_BlastSearchEngineCore(). |
|
||||||||||||
|
Initialize the mixed-frame sequence for out-of-frame gapped extension.
Definition at line 905 of file blast_util.c. References buffer, CODON_LENGTH, BlastQueryInfo::contexts, BlastQueryInfo::last_context, malloc(), NULLB, BlastContextInfo::query_length, BlastContextInfo::query_offset, and QueryInfo_GetSeqBufLen(). Referenced by BLAST_MainSetUp(), and BOOST_AUTO_TEST_CASE(). |
|
||||||||||||
|
Convert translation frame or strand into a context number suitable for indexing into the BlastQueryInfo::contexts array.
Definition at line 1174 of file blast_util.c. References ASSERT, Blast_QueryIsNucleotide(), Blast_QueryIsTranslated(), Blast_SubjectIsNucleotide(), and Blast_SubjectIsTranslated(). Referenced by BLAST_GetGappedScore(), Blast_HSPGetTargetTranslation(), BLAST_SmithWatermanGetGappedScore(), BlastRPSWordFinder(), and s_BlastHSPListRPSUpdate(). |
|
||||||||||||||||||||||||||||||||
|
Translate nucleotide into 6 frames. All frames are put into a translation buffer, with sentinel NULLB bytes in between. Array of offsets into the translation buffer is also returned. For out-of-frame gapping option, a mixed frame sequence is created.
Definition at line 1016 of file blast_util.c. References BLAST_ContextToFrame(), BLAST_TranslateCompressedSequence(), eBlastEncodingNcbi2na, eBlastEncodingNcbi4na, eBlastTypeBlastx, FALSE, GetReverseNuclSequence(), malloc(), NUM_FRAMES, s_BlastGetTranslationTable(), and TRUE. Referenced by BlastTargetTranslationNew(), BOOST_AUTO_TEST_CASE(), and s_BlastSearchEngineCore(). |
|
|
Get the number of contexts for a given program. This corresponds to the number of translation frames or strands whenever applicable.
Definition at line 1336 of file blast_util.c. References Blast_ProgramIsValid(), Blast_QueryIsNucleotide(), Blast_QueryIsTranslated(), NUM_FRAMES, and NUM_STRANDS. Referenced by BlastHSPStreamMerge(), BlastQueryInfoGetEffSearchSpace(), BlastQueryInfoGetQueryLength(), BlastQueryInfoNew(), BlastQueryInfoSetEffSearchSpace(), BlastSetUp_GetFilteringLocations(), CBlastAncillaryData::CBlastAncillaryData(), GetNumberOfContexts(), and CBlastQueryFilteredFrames::GetNumFrames(). |
|
||||||||||||||||||||||||||||||||
|
Get one frame translation - needed when only parts of subject sequences are translated.
Definition at line 1110 of file blast_util.c. References BLAST_GetTranslation(), CODON_LENGTH, GetReverseNuclSequence(), and malloc(). Referenced by Blast_HSPGetPartialSubjectTranslation(), and s_SequenceGetTranslatedRange(). |
|
|
Get the standard amino acid probabilities. This is basically a wrapper for BlastScoreBlkNew() and Blast_ResFreqStdComp() from blast_stat.c with a more intention-revealing name :) Caller is responsible for deallocating return value via sfree().
Definition at line 1286 of file blast_util.c. References Blast_ResFreqFree(), Blast_ResFreqNew(), Blast_ResFreqStdComp(), BLASTAA_SEQ_CODE, BLASTAA_SIZE, malloc(), Blast_ResFreq::prob, and TRUE. Referenced by _PSISequenceWeightsNew(), ColumnResidueProfile::getBackgroundResFreq(), GetStandardProbability(), Kappa_compactSearchItemsNew(), and PSICreatePssmFromFrequencyRatios(). |
|
||||||||||||
|
Calculates the length of frame for a translated protein.
Definition at line 897 of file blast_util.c. References CODON_LENGTH. |
|
||||||||||||||||||||||||||||
|
GetTranslation to get the translation of the nucl. sequence in the appropriate frame and with the appropriate GeneticCode. The function return an allocated char*, the caller must delete this. The first and last spaces of this char* contain NULLB's.
Definition at line 409 of file blast_util.c. References ABS, CODON_LENGTH, FENCE_SENTRY, IS_residue, NULLB, and s_CodonToAA(). Referenced by Blast_GetPartialTranslation(), Blast_HSPGetTargetTranslation(), and BlastTargetTranslationNew(). |
|
||||||||||||||||||||
|
Convert a sequence in ncbi4na or blastna encoding into a packed sequence in ncbi2na encoding. Needed for 2 sequences BLASTn comparison.
Definition at line 844 of file blast_util.c. References COMPRESSION_RATIO, eBlastEncodingNucleotide, malloc(), NCBI2NA_MASK, and NCBI4NA_TO_BLASTNA. |
|
|
Returns a copy of the input string with all its characters turned to uppercase. Useful for saving score matrix names. Caller is responsible for deallocating return value.
Definition at line 1315 of file blast_util.c. Referenced by Blast_ScoreBlkMatrixInit(). |
|
||||||||||||||||||||||||
|
Translate a nucleotide sequence without ambiguity codes. This is used for the first-pass translation of the database. The genetic code to be used is determined by the translation_table This function translates a packed (ncbi2na) nucl. alphabet. It views a basepair as being in one of four sets of 2-bits: |0|1|2|3||0|1|2|3||0|1|2|3||... 1st byte | 2 byte | 3rd byte... A codon that starts at the beginning of the above sequence starts in state "0" and includes basepairs 0, 1, and 2. The next codon, in the same frame, after that starts in state "3" and includes 3, 0, and 1. Optimization: changed the single main loop to
Definition at line 489 of file blast_util.c. References ABS, CODON_LENGTH, and NULLB. Referenced by BLAST_GetAllTranslations(). |
|
|
Adds a specialized representation of sequence data to a sequence block. In the specialized representation, the byte at offset i packs together nucleotide bases i to i+3
Definition at line 440 of file blast_util.c. References BLAST_SequenceBlk::compressed_nuc_seq, BLAST_SequenceBlk::compressed_nuc_seq_start, len, BLAST_SequenceBlk::length, malloc(), MIN, and BLAST_SequenceBlk::sequence. |
|
||||||||||||
|
Return string name for program given a number. Return is zero on success.
Definition at line 296 of file blast_util.c. References eBlastTypeBlastn, eBlastTypeBlastp, eBlastTypeBlastx, eBlastTypePhiBlastn, eBlastTypePhiBlastp, eBlastTypePsiBlast, eBlastTypePsiTblastn, eBlastTypeRpsBlast, eBlastTypeRpsTblastn, eBlastTypeTblastn, eBlastTypeTblastx, and strdup. Referenced by Blast_ProgramNameFromType(), and CRedoAlignmentTestFixture::runRedoAlignmentCoreUnitTest(). |
|
||||||||||||
|
Set number for a given program type. Return is zero on success.
Definition at line 264 of file blast_util.c. References eBlastTypeBlastn, eBlastTypeBlastp, eBlastTypeBlastx, eBlastTypePhiBlastn, eBlastTypePhiBlastp, eBlastTypePsiBlast, eBlastTypePsiTblastn, eBlastTypeRpsBlast, eBlastTypeRpsTblastn, eBlastTypeTblastn, eBlastTypeTblastx, eBlastTypeUndefined, and strcasecmp. Referenced by NetworkProgram2BlastProgramType(). |
|
|
Allocates a new sequence block structure.
Definition at line 132 of file blast_util.c. References calloc(). Referenced by AascanTestFixture::AascanTestFixture(), BlastSetUp_SeqBlkNew(), BOOST_AUTO_TEST_CASE(), AalookupTestFixture::GetSeqBlk(), InitializeBlastScoreBlk(), s_SetupSequencesForGappedReevaluateTest(), s_SetupSequencesForUngappedReevaluateNucl(), s_SetupSequencesForUngappedReevaluateTransl(), TestFixture::SetUpQuery(), TestFixture::SetUpSubject(), SetupSubjects_OMF(), CPssmEngine::x_InitializeScoreBlock(), and CPhiblastTestFixture::x_SetupSequenceBlk(). |
|
||||||||||||
|
Stores the compressed nucleotide sequence in the sequence block structure for the subject sequence when BLASTing 2 sequences. This sequence should be encoded in eBlastEncodingNcbi2na and NOT have sentinel bytes (as this encoding doesn't allow them).
Definition at line 163 of file blast_util.c. References BLAST_SequenceBlk::oof_sequence, BLAST_SequenceBlk::sequence, BLAST_SequenceBlk::sequence_allocated, and TRUE. Referenced by TestFixture::SetUpSubject(). |
|
||||||||||||||||||||
|
Sets the seq_range and related fields appropriately in the BLAST_SequenceBlk structure.
Definition at line 178 of file blast_util.c. References ASSERT, calloc(), FALSE, BLAST_SequenceBlk::num_seq_ranges, s_BlastSequenceBlkFreeSeqRanges(), BLAST_SequenceBlk::seq_ranges, BLAST_SequenceBlk::seq_ranges_allocated, and TRUE. Referenced by BOOST_AUTO_TEST_CASE(), SetupSubjects_OMF(), and TestFixture::SkipMaskedRangesCore(). |
|
||||||||||||||||
|
Stores the sequence in the sequence block structure.
Definition at line 146 of file blast_util.c. References BLAST_SequenceBlk::length, BLAST_SequenceBlk::oof_sequence, BLAST_SequenceBlk::sequence, BLAST_SequenceBlk::sequence_start, BLAST_SequenceBlk::sequence_start_allocated, and TRUE. Referenced by AascanTestFixture::AascanTestFixture(), BOOST_AUTO_TEST_CASE(), AalookupTestFixture::GetSeqBlk(), InitializeBlastScoreBlk(), s_SetupSequencesForGappedReevaluateTest(), s_SetupSequencesForUngappedReevaluateNucl(), s_SetupSequencesForUngappedReevaluateTransl(), TestFixture::SetUpQuery(), CPssmEngine::x_InitializeScoreBlock(), and CPhiblastTestFixture::x_SetupSequenceBlk(). |
|
|
Deallocate memory only for the sequence in the sequence block.
Definition at line 210 of file blast_util.c. References FALSE, BLAST_SequenceBlk::oof_sequence, BLAST_SequenceBlk::oof_sequence_allocated, s_BlastSequenceBlkFreeSeqRanges(), BLAST_SequenceBlk::sequence, BLAST_SequenceBlk::sequence_allocated, BLAST_SequenceBlk::sequence_start, BLAST_SequenceBlk::sequence_start_allocated, and sfree. Referenced by BLAST_ComputeTraceback(), and BlastSequenceBlkFree(). |
|
||||||||||||
|
Copies contents of the source sequence block without copying sequence buffers; sets all "field_allocated" booleans to FALSE, to make sure fields are not freed on the call to BlastSequenceBlkFree.
Definition at line 245 of file blast_util.c. References ASSERT, BlastMemDup(), and FALSE. Referenced by s_MultiSeqGetSequence(), and s_QueryFactoryGetSequence(). |
|
|
Deallocate memory for a sequence block.
Definition at line 231 of file blast_util.c. References BlastMaskLocFree(), BlastSequenceBlkClean(), BLAST_SequenceBlk::compressed_nuc_seq_start, BLAST_SequenceBlk::lcase_mask, BLAST_SequenceBlk::lcase_mask_allocated, and sfree. Referenced by BOOST_AUTO_TEST_CASE(), s_MatchingSequenceRelease(), s_RPSComputeTraceback(), TestFixture::TearDownQuery(), TestFixture::TearDownSubject(), AalookupTestFixture::~AalookupTestFixture(), AascanTestFixture::~AascanTestFixture(), CMultiSeqInfo::~CMultiSeqInfo(), and CQueryFactoryInfo::~CQueryFactoryInfo(). |
|
||||||||||||||||||||
|
Allocates memory for *sequence_blk and then populates it.
Definition at line 106 of file blast_util.c. References ASSERT, BlastSeqBlkNew(), and TRUE. Referenced by AalookupTestFixture::GetSeqBlk(). |
|
|
Free SBlastTargetTranslation object to be freed [in].
Definition at line 1211 of file blast_util.c. References SBlastTargetTranslation::num_frames, SBlastTargetTranslation::range, sfree, and SBlastTargetTranslation::translations. |
|
||||||||||||||||||||||||
|
Sets up structure for target translation. Target sequence information [in] Genetic code translation information [in] BLAST program [in] Out-of-frame translation if true [in] Structure being set up. [out] Definition at line 1231 of file blast_util.c. References BLAST_ContextToFrame(), BLAST_GetAllTranslations(), BLAST_GetTranslation(), calloc(), eBlastEncodingNcbi4na, eBlastTypeBlastx, SBlastTargetTranslation::gen_code_string, GetReverseNuclSequence(), BLAST_SequenceBlk::length, malloc(), NUM_FRAMES, SBlastTargetTranslation::num_frames, BLAST_SequenceBlk::oof_sequence, BLAST_SequenceBlk::oof_sequence_allocated, SBlastTargetTranslation::partial, SBlastTargetTranslation::program_number, BLAST_SequenceBlk::sequence_start, sfree, SBlastTargetTranslation::translations, and TRUE. Referenced by Blast_HSPListReevaluateWithAmbiguitiesUngapped(), Blast_TracebackFromHSPList(), BOOST_AUTO_TEST_CASE(), and s_ComputeNumIdentities(). |
|
||||||||||||||||
|
The following binary search routine assumes that array A is filled.
Definition at line 1194 of file blast_util.c. |
|
||||||||||||||||
|
Reverse a nucleotide sequence in the blastna encoding, adding sentinel bytes on both ends.
Definition at line 788 of file blast_util.c. References malloc(), and NULLB. Referenced by BLAST_GetAllTranslations(), Blast_GetPartialTranslation(), Blast_HSPGetTargetTranslation(), and BlastTargetTranslationNew(). |
|
||||||||||||
|
Gets the translation array for a given genetic code. This array is optimized for the NCBI2na alphabet. The reverse complement can also be spcified.
Definition at line 961 of file blast_util.c. References calloc(). Referenced by BLAST_GetAllTranslations(). |
|
|
Auxiliary function to free the BLAST_SequenceBlk::seq_ranges field if applicable.
Definition at line 95 of file blast_util.c. References ASSERT, FALSE, BLAST_SequenceBlk::num_seq_ranges, BLAST_SequenceBlk::seq_ranges, BLAST_SequenceBlk::seq_ranges_allocated, and sfree. Referenced by BlastSeqBlkSetSeqRanges(), and BlastSequenceBlkClean(). |
|
||||||||||||
|
Translate 3 nucleotides into an amino acid MUST have 'X' as unknown amino acid.
Definition at line 351 of file blast_util.c. References AMINOACID_TO_NCBISTDAA, and FENCE_SENTRY. Referenced by BLAST_GetTranslation(). |
|
|
Deallocates a SBlastProgress structure. Implemented in blast_util.c Definition at line 1360 of file blast_util.c. References sfree. |
|
|
Allocates and initializes a new SBlastProgress structure.
Definition at line 1350 of file blast_util.c. References calloc(), and SBlastProgress::user_data. Referenced by CBl2Seq::SetInterruptCallback(). |
|
|
Resets the progress structure to its original state (as if newly allocated) for a fresh start without touching the user_data field.
Definition at line 1369 of file blast_util.c. References ePrelimSearch, and SBlastProgress::stage. Referenced by CBl2Seq::RunFullSearch(). |
|
||||||||||||||||
|
Returns the index of the range, such that this element is the first range that either contains the target or if no such range exists, the index of the first range, such that the target is less than this range.
Definition at line 61 of file blast_util.c. Referenced by BOOST_AUTO_TEST_CASE(), and s_AdjustSubjectChunks(). |
|
||||||||||||
|
Create a new SSeqRange structure with both fields initialized.
Definition at line 52 of file blast_util.c. References SSeqRange::left, and SSeqRange::right. Referenced by BOOST_AUTO_TEST_CASE(). |
|
|
Initial value:
"$Id: blast_util.c 177349 2009-11-30 19:00:28Z coulouri $"
Definition at line 36 of file blast_util.c. |
1.4.6
Modified on Wed Dec 09 08:18:30 2009 by modify_doxy.py rev. 173732